// Copyright (C) 1985-1998 by Symantec
// Copyright (C) 2000-2009 by Digital Mars
// All Rights Reserved
// http://www.digitalmars.com
// Written by Walter Bright
/*
 * This source file is made available for personal use
 * only. The license is in /dmd/src/dmd/backendlicense.txt
 * For any other uses, please contact Digital Mars.
 */


#if (SCPP || MARS) && !HTOD

#include	<stdio.h>
#include	<string.h>
#include	<time.h>

#include	"cc.h"
#include	"el.h"
#include	"go.h"
#include	"oper.h"
#include	"global.h"
#include	"type.h"

static char __file__[] = __FILE__;	/* for tassert.h		*/
#include	"tassert.h"

/*#define vec_copy(t,f) (dbg_printf("line %d\n",__LINE__),vec_copy((t),(f)))*/

extern mftype mfoptim;

struct Iv;

/*********************************
 * Loop data structure.
 */

struct loop
{   loop *Lnext;	// Next loop in list (startloop -> start of list)
    vec_t Lloop;	// Vector of blocks in this loop
    vec_t Lexit;	// Vector of exit blocks of loop
    block *Lhead;	// Pointer to header of loop
    block *Ltail;	// Pointer to tail
    block *Lpreheader;	// Pointer to preheader (if any)
    list_t Llis;	// loop invariant elems moved to Lpreheader, so
			// redundant temporaries aren't created
    Iv *Livlist;	// basic induction variables
    Iv *Lopeqlist;	// list of other op= variables
    void print();
    static loop *mycalloc();

    static loop *freelist;
};

struct famlist
{	elem **FLpelem;		/* parent of elem in the family		*/
	elem *c1,*c2;		/* c1*(basic IV) + c2			*/
#define FLELIM	((symbol *)-1)
	symbol *FLtemp;		// symbol index of temporary (FLELIM if	*/
				/* this entry has no temporary)		*/
	tym_t FLty;		/* type of this induction variable	*/
	tym_t FLivty;		/* type of the basic IV elem (which is	*/
				/* not necessarilly the type of the IV	*/
				/* elem!)				*/
	famlist *FLnext;	// next in list
	void print();

    static famlist *mycalloc();

    static famlist *freelist;
};

struct Iv
{
	symbol *IVbasic;	// symbol of basic IV
	elem **IVincr;		// pointer to parent of IV increment elem
	famlist *IVfamily;	// variables in this family
	Iv *IVnext;		// next iv in list
	void print();

    static Iv *mycalloc();

    static Iv *freelist;
};

STATIC void freeloop(loop **pl);
STATIC void buildloop(loop **pl, block *head, block *tail);
STATIC void insert(block *b , vec_t lv);
STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit);
STATIC int looprotate(loop *l);
STATIC void markinvar(elem *n , vec_t rd);
STATIC bool refs(symbol *v , elem *n , elem *nstop);
STATIC void appendelem(elem *n , elem **pn);
STATIC void freeivlist(Iv *biv);
STATIC void unmarkall(elem *e);
void filterrd(vec_t f,vec_t rd,symbol *s);
STATIC void filterrdind(vec_t f,vec_t rd,elem *e);
STATIC famlist * simfl(famlist *fl , tym_t tym);
STATIC famlist * newfamlist(tym_t ty);
STATIC void loopiv(loop *l);
STATIC void findbasivs(loop *l);
STATIC void findopeqs(loop *l);
STATIC void findivfams(loop *l);
STATIC void ivfamelems(Iv *biv , elem **pn);
STATIC void elimfrivivs(loop *l);
STATIC void intronvars(loop *l);
STATIC bool funcprev(Iv *biv , famlist *fl);
STATIC void elimbasivs(loop *l);
STATIC void elimopeqs(loop *l);
STATIC famlist * flcmp(famlist *f1 , famlist *f2);
STATIC elem ** onlyref(symbol *x , loop *l , elem *incn , int *prefcount);
STATIC void countrefs(elem **pn , bool flag);
STATIC int countrefs2(elem *e);
STATIC void elimspec(loop *l);
STATIC void elimspecwalk(elem **pn);

static	bool addblk;			/* if TRUE, then we added a block */

/* is elem loop invariant?	*/
#define isLI(n) ((n)->Nflags & NFLli)

/* make elem loop invariant	*/
#define makeLI(n) ((n)->Nflags |= NFLli)

/******************************
 * UNAMBIG being defined means that:
 *	Only variables that could only be unambiguously defined
 *	are candidates for loop invariant removal and induction
 *	variables.
 *	This means only variables that have the SFLunambig flag
 *	set for them.
 *	Doing this will still cover 90% (I hope) of the cases, and
 *	is a lot faster to compute.
 */

#define UNAMBIG 1

/****************************
 */

#if !(TARGET_MAC)
/* These routines are not used for the Mac compiler build */


void famlist::print()
{
#ifdef DEBUG
    dbg_printf("famlist:\n");
    dbg_printf("*FLpelem:\n");
    elem_print(*FLpelem);
    dbg_printf("c1:");
    elem_print(c1);
    dbg_printf("c2:");
    elem_print(c2);
    dbg_printf("FLty = "); WRTYxx(FLty);
    dbg_printf("\nFLivty = "); WRTYxx(FLivty);
    dbg_printf("\n");
#endif
}


/****************************
 */

void Iv::print()
{
#ifdef DEBUG
    dbg_printf("IV: '%s'\n",IVbasic->Sident);
    dbg_printf("*IVincr:\n");
    elem_print(*IVincr);
#endif
}

#endif	/* !(TARGET_MAC) */

/***********************
 * Write loop.
 */


void loop::print()
{
#ifdef DEBUG
  loop *l = this;
  dbg_printf("loop %p, next = %p\n",l,(l) ? l->Lnext : (loop *) NULL);
  if (!l)
	return;
  dbg_printf("\thead: B%d, tail: B%d, prehead: B%d\n",l->Lhead->Bdfoidx,
	l->Ltail->Bdfoidx,(l->Lpreheader ) ? l->Lpreheader->Bdfoidx : 
							(unsigned)-1);
  dbg_printf("\tLloop "); vec_println(l->Lloop);
  dbg_printf("\tLexit "); vec_println(l->Lexit);
#endif
}

/***************************
 * Allocate loop.
 */

loop *loop::freelist = NULL;

loop *loop::mycalloc()
{   loop *l;

    if (freelist)
    {
	l = freelist;
	freelist = l->Lnext;
	memset(l,0,sizeof(loop));
    }
    else
	l = (loop *) mem_calloc(sizeof(loop));
    return l;
}

/*************
 * Free loops.
 */

STATIC void freeloop(loop **pl)
{ loop *ln;
  loop *l;

  for (l = *pl; l; l = ln)
  {	ln = l->Lnext;
	vec_free(l->Lloop);
	vec_free(l->Lexit);
	list_free(&l->Llis);
	l->Lnext = loop::freelist;
	loop::freelist = l;
  }
  *pl = NULL;
}

/**********************************
 * Initialize block information.
 * Returns:
 *	!=0	contains BCasm block
 */

int blockinit()
{ register unsigned i;
  register block *b;
  int hasasm = 0;

  assert(dfo);
  for (i = 0, b = startblock; b; i++, b = b->Bnext)
  {
#ifdef DEBUG			/* check integrity of Bpred and Bsucc	*/
	register list_t blp;

	for (blp = b->Bpred; blp; blp = list_next(blp))
	{	register list_t bls;

		for (bls = list_block(blp)->Bsucc; bls; bls = list_next(bls))
			if (list_block(bls) == b)
				goto L1;
		assert(0);
	    L1:	;
	}
#endif
	if (b->BC == BCasm)
	    hasasm = 1;
	;				/* compute number of blocks	*/
  }
  assert(numblks == i && maxblks);
  assert(i <= maxblks);
  for (i = 0; i < dfotop; i++)
  {	assert(dfo[i]->Bdfoidx == i);
	if (!dfo[i]->Bdom)
		dfo[i]->Bdom = vec_calloc(maxblks); /* alloc Bdom vectors */
  }
  return hasasm;
}

/****************************************
 * Compute dominators (Bdom) for each block.
 * See Aho & Ullman Fig. 13.5.
 * Note that flow graph is reducible if there is only one
 * pass through the loop.
 * Input:
 *	dfo[]
 * Output:
 *	fills in the Bdom vector for each block
 */

void compdom()
{ unsigned i;
  unsigned cntr;
  vec_t t1;
  list_t bl;
  bool chgs;
  block *sb;

  assert(dfo);
  sb = dfo[0];				// starting block
  t1 = vec_calloc(vec_numbits(sb->Bdom));	// allocate a temporary
  vec_clear(sb->Bdom);
  vec_setbit(0,sb->Bdom);		// starting block only doms itself
  for (i = 1; i < dfotop; i++)		// for all except startblock
	vec_set(dfo[i]->Bdom);		// dominate all blocks
  cntr = 0;				// # of times thru loop
  do
  {	chgs = FALSE;
	for (i = 1; i < dfotop; ++i)	// for each block in dfo[]
	{				// except startblock
		bl = dfo[i]->Bpred;
		if (bl)			// if there are predecessors
		{	vec_copy(t1,list_block(bl)->Bdom);
			while ((bl = list_next(bl)) != NULL)
			    vec_andass(t1,list_block(bl)->Bdom);
		}
		else
			vec_clear(t1);	// no predecessors to dominate
		vec_setbit(i,t1);	// each block doms itself
		if (chgs)
			vec_copy(dfo[i]->Bdom,t1);
		else if (!vec_equal(dfo[i]->Bdom,t1))	// if any changes
		{	vec_copy(dfo[i]->Bdom,t1);
			chgs = TRUE;
		}
	}
	cntr++;
	assert(cntr < 50);		// should have converged by now
  } while (chgs);
  vec_free(t1);
  if (cntr <= 2)
	cmes("Flow graph is reducible\n");
  else
	cmes("Flow graph is not reducible\n");
}

/***************************
 * Return !=0 if block A dominates block B.
 */

HINT dom(block *A,block *B)
{
  assert(A && B && dfo && dfo[A->Bdfoidx] == A);
  return vec_testbit(A->Bdfoidx,B->Bdom);
}

/**********************
 * Find all the loops.
 */

STATIC void findloops(loop **ploops)
{ unsigned i;
  list_t bl;
  block *b,*s;

  freeloop(ploops);

  //printf("findloops()\n");
  for (i = 0; i < dfotop; i++)
	dfo[i]->Bweight = 1;		/* reset Bweights		*/
  for (i = dfotop; i--;)		/* for each block (note reverse	*/
					/* dfo order, so most nested	*/
					/* loops are found first)	*/
  {	b = dfo[i];
	assert(b);
	for (bl = b->Bsucc; bl; bl = list_next(bl))
	{	s = list_block(bl);		/* each successor s to b */
		assert(s);
		if (dom(s,b))			/* if s dominates b	*/
		    buildloop(ploops,s,b);	// we found a loop
	}
  }

#ifdef DEBUG
  if (debugc)
  { loop *l;

    for (l = *ploops; l; l = l->Lnext)
    {
	l->print();
    }
  }
#endif
}

/********************************
 */

STATIC void loop_weight(block *b,int factor)
{
    // Be careful not to overflow
    if (b->Bweight < 0x10000)
	b->Bweight *= 10 * factor;
    else if (b->Bweight < 0x100000)
	b->Bweight *= 2 * factor;
    else
	b->Bweight += factor;
}

/*****************************
 * Construct natural loop.
 * Algorithm 13.1 from Aho & Ullman.
 * Note that head dom tail.
 */

STATIC void buildloop(loop **ploops,block *head,block *tail)
{ loop *l;
  unsigned i;
  list_t bl;

  //printf("buildloop()\n");
  /* See if this is part of an existing loop. If so, merge the two.	*/
  for (l = *ploops; l; l = l->Lnext)
	if (l->Lhead == head)		/* two loops with same header	*/
	{
	    vec_t v;

	    // Calculate loop contents separately so we get the Bweights
	    // done accurately.

	    v = vec_calloc(maxblks);
	    vec_setbit(head->Bdfoidx,v);
	    loop_weight(head,1);
	    insert(tail,v);

	    vec_orass(l->Lloop,v);	// merge into existing loop
	    vec_free(v);

	    vec_clear(l->Lexit);	// recompute exit blocks
	    goto L1;
	}

  /* Allocate loop entry	*/
  l = loop::mycalloc();
  l->Lnext = *ploops;
  *ploops = l;				// put l at beginning of list

  l->Lloop = vec_calloc(maxblks);	/* allocate loop bit vector	*/
  l->Lexit = vec_calloc(maxblks);	/* bit vector for exit blocks	*/
  l->Lhead = head;
  l->Ltail = tail;

  vec_setbit(head->Bdfoidx,l->Lloop);	/* add head to the loop		*/
  loop_weight(head,2);			// *20 usage for loop header

  insert(tail,l->Lloop);		/* insert tail in loop		*/

L1:
  /* Find all the exit blocks (those blocks with
   * successors outside the loop).
   */

  foreach (i,dfotop,l->Lloop)		/* for each block in this loop	*/
  {	if (dfo[i]->BC == BCret || dfo[i]->BC == BCretexp || dfo[i]->BC == BCexit)
		vec_setbit(i,l->Lexit); /* ret blocks are exit blocks */
	else
	{	for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
			if (!vec_testbit(list_block(bl)->Bdfoidx,l->Lloop))
			{	vec_setbit(i,l->Lexit);
				break;
			}
	}
  }

    /*	Find preheader, if any, to the loop.
	The preheader is a block that has only the head as a successor.
	All other predecessors of head must be inside the loop.
     */
    l->Lpreheader = NULL;
    for (bl = head->Bpred; bl; bl = list_next(bl))
    {   block *b = list_block(bl);

	if (!vec_testbit(b->Bdfoidx,l->Lloop))	/* if not in loop	*/
	{   if (l->Lpreheader)			/* if already one	*/
	    {   l->Lpreheader = NULL;		/* can only be one	*/
		break;
	    }
	    else
	    {	if (list_next(b->Bsucc))	// if more than 1 successor
		    break;			// b can't be a preheader
		l->Lpreheader = b;
	    }
	}
    }
}

/********************************
 * Support routine for buildloop().
 * Add a block b and all its predecessors to loop lv.
 */

STATIC void insert(register block *b, register vec_t lv)
{ register list_t bl;

  assert(b && lv);
  if (!vec_testbit(b->Bdfoidx,lv))	/* if block is not in loop	*/
  {	vec_setbit(b->Bdfoidx,lv);	/* add block to loop		*/
	loop_weight(b,1);		// *10 usage count
	for (bl = b->Bpred; bl; bl = list_next(bl))
	    insert(list_block(bl),lv);	/* insert all its predecessors	*/
  }
}

/**************************************
 * Perform loop rotations.
 * Loop starts as:
 *
 *	   prehead
 *	    |
 *	    v
 *	+->head---->
 *	|   |
 *	|   v
 *	|  body
 *	|   |
 *	|   v
 *	+--tail
 *
 * Two types are done:
 *	1) Header is moved to be past the tail.
 *
 *	   prehead
 *	    |
 *	+---+
 *	|
 *	|  body<-+
 *	|   |    |
 *	|   v    |
 *	|  tail  |
 *	|   |    |
 *	|   v    |
 *	+->head--+
 *	    |
 *	    v
 *
 *	2) Header is copied past the tail (done only if MFtime is set).
 *
 *	   prehead
 *	    |
 *	    v
 *	   head1-----+
 *	    |        |
 *	    v        |
 *	   body<--+  |
 *	    |     |  |
 *	    v     |  |
 *	   tail   |  |
 *	    |     |  |
 *	    v     |  |
 *	   head2--+  |
 *	    |        |
 *	    +--------+
 *	    v
 *
 * Input:
 *	Loop information (do not depend on the preheader information)
 * Output:
 *	Revised list of blocks, a new dfo and new loop information
 * Returns:
 *	TRUE need to recompute loop data
 */

STATIC int looprotate(loop *l)
{
    register    block *tail = l->Ltail;
    register    block *head = l->Lhead;
    register    block *b;

    //printf("looprotate(%p)\n",l);

    // Do not rotate loop if:
    if (head == tail ||				// loop is only one block big
	!vec_testbit(head->Bdfoidx,l->Lexit))	// header is not an exit block
	goto Lret;

    if (//iter != 1 &&
	vec_testbit(tail->Bdfoidx,l->Lexit))	// tail is an exit block
	goto Lret;

    // Do not rotate if already rotated
    for (b = tail->Bnext; b; b = b->Bnext)
	if (b == head)			// if loop already rotated
	    goto Lret;

#if SCPP
    if (head->BC == BCtry)
	 goto Lret;
#endif
    if (head->BC == BC_try)
	 goto Lret;
#ifdef DEBUG
    //if (debugc) { dbg_printf("looprotate: "); l->print(); }
#endif

    if (
#if !TX86
       // too costly overall - Slows compiler by 5%-10%,
       // excess code generated, little or no time saving
       0 && config.optimized &&
#endif
       (mfoptim & MFtime) && head->BC != BCswitch && head->BC != BCasm)
    {   // Duplicate the header past the tail (but doing
	// switches would be too expensive in terms of code
	// generated).
	register    block *head2;
	register    list_t bl, *pbl2, *pbl, *pbln;

	head2 = block_calloc();	// create new head block
	numblks++;			// number of blocks in existence
	head2->Btry = head->Btry;
#if 1
	head2->Bflags = head->Bflags;
	head->Bflags = BFLnomerg;	// move flags over to head2
	head2->Bflags |= BFLnomerg;
#else
	head->Bflags = 0;		// move flags over to head2
#endif
	head2->BC = head->BC;
	assert(head2->BC != BCswitch);
	if (head->Belem)		// copy expression tree
	    head2->Belem = el_copytree(head->Belem);
	head2->Bnext = tail->Bnext;
	tail->Bnext = head2;

	// pred(head1) = pred(head) outside loop
	// pred(head2) = pred(head) inside loop
	pbl2 = &(head2->Bpred);
	for (pbl = &(head->Bpred); *pbl; pbl = pbln)
	{
	    if (vec_testbit(list_block(*pbl)->Bdfoidx, l->Lloop))
	    {   // if this predecessor is inside the loop

		*pbl2 = *pbl;
		*pbl = list_next(*pbl);
		pbln = pbl;			// don't skip this next one
		list_next(*pbl2) = NULL;
		bl = list_block(*pbl2)->Bsucc;
		pbl2 = &(list_next(*pbl2));
		for (; bl; bl = list_next(bl))
		    if (list_block(bl) == head)
		    {
			list_ptr(bl) = (void *)head2;
			goto L2;
		    }
		assert(0);
	L2:     ;
	    }
	    else
		pbln = &(list_next(*pbl));	// next predecessor in list
	} // for each pred(head)

	// succ(head2) = succ(head)
	for (bl = head->Bsucc; bl; bl = list_next(bl))
	{
	    list_append(&(head2->Bsucc),list_block(bl));
	    list_append(&(list_block(bl)->Bpred),head2);
	}
	changes++;
	return TRUE;
    }
    else if (startblock != head
	    /* This screws up the OPctor/OPdtor sequence for:
	     *   struct CString
	     *   {   CString();
	     *      ~CString();
	     *      int GetLength();
	     *   };
	     *
	     *   void f(void)
	     *   {  for(;;)
	     *      {   CString s ;
	     *	  if(s.GetLength()!=0)
	     *	     break ;
	     *      }
	     *   }
	     */
	    && !(config.flags3 & CFG3eh)
	    )
    {   // optimize for space
	// Simply position the header past the tail
	for (b = startblock; b; b = b->Bnext)
	    if (b->Bnext == head)
		goto L1;		// found parent b of head
	assert(0);

    L1:
#if !TX86
	if (config.flags3 & CFG3eh)
	{
	    if (!(b->Bflags & BFLlooprt))
	    {
		b->Boldnext = b->Bnext;
		b->Bflags |= BFLlooprt;
	    }
	    if (!(head->Bflags & BFLlooprt))
	    {
		head->Boldnext = head->Bnext;
		head->Bflags |= BFLlooprt;
	    }
	}
#endif
	b->Bnext = head->Bnext;
	head->Bnext = tail->Bnext;
	tail->Bnext = head;
	cmes2( "Rotated loop %p\n", l);
	changes++;
    }
Lret:
    return FALSE;
}

static int gref;		// parameter for markinvar()
static block *gblock;		// parameter for markinvar()
static vec_t lv;		// parameter for markinvar()
static vec_t gin;		// parameter for markinvar()
static bool doflow;		// TRUE if flow analysis has to be redone

/*********************************
 * Loop invariant and induction variable elimination.
 * Input:
 *	iter	which optimization iteration we are on
 */

void loopopt()
{
    list_t bl;
    loop *l;
    loop *ln;
    vec_t rd;
    loop *startloop;

    cmes("loopopt()\n");
    startloop = NULL;
restart:
    file_progress();
    if (blockinit())			// init block data
    {
	findloops(&startloop);		// Compute Bweights
	freeloop(&startloop);		// free existing loops
	return;				// can't handle ASM blocks
    }
    compdom();				// compute dominators
    findloops(&startloop);		// find the loops

    for (l = startloop; l; l = ln)
    {
	ln = l->Lnext;
	if (looprotate(l))		// rotate the loop
	{
	    compdfo();
	    blockinit();
	    compdom();
	    findloops(&startloop);	// may trash l->Lnext
	    if (ln)
	    {	ln = startloop;		// start over
		file_progress();
	    }
	}
    }
    // Make sure there is a preheader for each loop.

    addblk = FALSE;			/* assume no blocks added	 */
    for (l = startloop; l; l = l->Lnext)/* for each loop		 */
    {
	T68000(file_progress();)
#ifdef DEBUG
	//if (debugc) l->print();
#endif
	if (!l->Lpreheader)		/* if no preheader		 */
	{   register block *h, *p;

	    cmes("Generating preheader for loop\n");
	    addblk = TRUE;		/* add one			 */
	    p = block_calloc();		// the preheader
	    numblks++;
	    assert (numblks <= maxblks);
	    h = l->Lhead;		/* loop header			 */

	    /* Find parent of h	*/
	    if (h == startblock)
		startblock = p;
	    else
	    {	register block *ph;

		for (ph = startblock; 1; ph = ph->Bnext)
		{   assert(ph);		/* should have found it		*/
		    if (ph->Bnext == h)
			    break;
		}
		/* Link p into block list between ph and h	*/
		ph->Bnext = p;
	    }
	    p->Bnext = h;

	    l->Lpreheader = p;
	    p->BC = BCgoto;
	    assert(p->Bsucc == NULL);
	    list_append(&(p->Bsucc),h);	/* only successor is h		*/
	    p->Btry = h->Btry;

	    cmes3("Adding preheader %p to loop %p\n",p,l);

	    // Move preds of h that aren't in the loop to preds of p
	    for (bl = h->Bpred; bl;)
	    {	register block *b = list_block(bl);

		if (!vec_testbit (b->Bdfoidx, l->Lloop))
		{   register list_t bls;

		    list_append(&(p->Bpred), b);
		    list_subtract(&(h->Bpred), b);
		    bl = h->Bpred;	/* dunno what subtract did	*/

		    /* Fix up successors of predecessors	*/
		    for (bls = b->Bsucc; bls; bls = list_next(bls))
			if (list_block(bls) == h)
				list_ptr(bls) = (void *)p;
		}
		else
		    bl = list_next(bl);
	    }
	    list_append(&(h->Bpred),p);	/* p is a predecessor to h	*/
	}
    } /* for */
    if (addblk)				/* if any blocks were added	 */
    {
	compdfo();			/* compute depth-first order	*/
	blockinit();
	compdom();
	findloops(&startloop);		// recompute block info
	addblk = FALSE;
    }

    /* Do the loop optimizations. Note that accessing the loops	*/
    /* starting from startloop will access them in least nested	*/
    /* one first, thus moving LIs out as far as possible.	*/

    doflow = TRUE;			/* do flow analysis		*/
    cmes("Starting loop invariants\n");

    for (l = startloop; l; l = l->Lnext)
    {	register unsigned i,j;

#ifdef DEBUG
	//if (debugc) l->print();
#endif
	file_progress();
	assert(l->Lpreheader);
	if (doflow)
	{
		flowrd();		/* compute reaching definitions	 */
		flowlv();		/* compute live variables	 */
		flowae();		// compute available expressions
		doflow = FALSE;		/* no need to redo it		*/
		if (deftop == 0)	/* if no definition elems	*/
			break;		/* no need to optimize		*/
	}
	lv = l->Lloop;
	cmes2("...Loop %p start...\n",l);

	/* Unmark all elems in this loop	 */
	foreach (i,dfotop,lv)
	    if (dfo[i]->Belem)
		unmarkall(dfo[i]->Belem);	/* unmark all elems	*/

	/* Find & mark all LIs	 */
	gin = vec_clone(l->Lpreheader->Bout);
	rd = vec_calloc(deftop);	/* allocate our running RD vector */
	foreach (i,dfotop,lv)		/* for each block in loop	*/
	{   block *b = dfo[i];

	    cmes2("B%d\n",i);
	    if (b->Belem)
	    {
		vec_copy(rd, b->Binrd);	// IN reaching defs
#if 0
		dbg_printf("i = %d\n",i);
		{   int j;
		    for (j = 0; j < deftop; j++)
			elem_print(defnod[j].DNelem);
		}
		dbg_printf("rd    : "); vec_println(rd);
#endif
		gblock = b;
		gref = 0;
		if (b != l->Lhead)
		    gref = 1;
		markinvar(b->Belem, rd);
#if 0
		dbg_printf("i = %d\n",i);
		{   int j;
		    for (j = 0; j < deftop; j++)
			elem_print(defnod[j].DNelem);
		}
		dbg_printf("rd    : "); vec_println(rd);
		dbg_printf("Boutrd: "); vec_println(b->Boutrd);
#endif
		assert(vec_equal(rd, b->Boutrd));
	    }
	    else
		assert(vec_equal(b->Binrd, b->Boutrd));
	}
	vec_free(rd);
	vec_free(gin);

	/* Move loop invariants	 */
	foreach (i,dfotop,lv)
	{
	    int domexit;		// TRUE if this block dominates all
					// exit blocks of the loop

	    foreach (j,dfotop,l->Lexit)	/* for each exit block	*/
	    {
		    if (!vec_testbit (i, dfo[j]->Bdom))
		    {   domexit = 0;
			goto L1;		// break if !(i dom j)
		    }
	    }
	    // if i dom (all exit blocks)
	    domexit = 1;
	L1:	;
	    if (dfo[i]->Belem)
	    {   // If there is any hope of making an improvement
		if (domexit || l->Llis)
		{   if (dfo[i] != l->Lhead)
			; //domexit |= 2;
		    movelis(dfo[i]->Belem, dfo[i], l, &domexit);
		}
	    }
	}
	//list_free(&l->Llis,FPNULL);
	cmes2("...Loop %p done...\n",l);

#if TX86
	if (mfoptim & MFliv)
#else
	if (config.optimized && (mfoptim & MFliv))
#endif
	{	loopiv(l);		/* induction variables		*/
		if (addblk)		/* if we added a block		*/
		{	compdfo();
			goto restart;	/* play it safe and start over	*/
		}
	}
    } /* for */
    freeloop(&startloop);
}

/*****************************
 * If elem is loop invariant, mark it.
 * Input:
 *	lv =	vector of all the blocks in this loop.
 *	rd =	vector of loop invariants for this elem. This must be
 *		continually updated.
 * Note that we do not iterate until no more LIs are found. The only
 * thing this would buy us is stuff that depends on LI assignments.
 */

STATIC void markinvar(elem *n,vec_t rd)
{ vec_t tmp;
  unsigned i;
  symbol *v;
  elem *n1;

  assert(n && rd);
  assert(vec_numbits(rd) == deftop);
  switch (n->Eoper)
  {
	case OPaddass:	case OPminass:	case OPmulass:	case OPandass:
	case OPorass:	case OPxorass:	case OPdivass:	case OPmodass:
	case OPshlass:	case OPshrass:	case OPashrass:
	case OPpostinc:	case OPpostdec:
	case OPcall:
			markinvar(n->E2,rd);
	case OPnegass:
			n1 = n->E1;
			if (n1->Eoper == OPind)
				markinvar(n1->E1,rd);
			else if (OTbinary(n1->Eoper))
			{   markinvar(n1->E1,rd);
			    markinvar(n1->E2,rd);
			}
	L2:
			if (n->Eoper == OPcall ||
			    gblock->Btry ||
			    !(n1->Eoper == OPvar &&
				symbol_isintab(n1->EV.sp.Vsym)))
			{
			    gref = 1;
			}

			updaterd(n,rd,NULL);
			break;

	case OPcallns:
		markinvar(n->E2,rd);
		markinvar(n->E1,rd);
		break;

#if TX86
	case OPstrcpy:
	case OPstrcat:
	case OPmemcpy:
	case OPmemset:
		markinvar(n->E2,rd);
		markinvar(n->E1,rd);
		updaterd(n,rd,NULL);
		break;
	case OPbtc:
	case OPbtr:
	case OPbts:
		markinvar(n->E1,rd);
		markinvar(n->E2,rd);
		updaterd(n,rd,NULL);
		break;
#endif
	case OPucall:
		markinvar(n->E1,rd);
		/* FALL-THROUGH */
	case OPasm:
		gref = 1;
		updaterd(n,rd,NULL);
		break;

	case OPucallns:
	case OPstrpar:
	case OPstrctor:
#if TX86
	case OPvoid:
	case OPstrlen:
	case OPinp:
#endif
#if TARGET_MAC
	case OPvptrfptr:
	case OPcvptrfptr:
#endif
		markinvar(n->E1,rd);
		break;
	case OPcond:
	case OPparam:
#if TX86
	case OPoutp:
	case OPstrcmp:
	case OPmemcmp:
	case OPbt:			// OPbt is like OPind, assume not LI
#endif
		markinvar(n->E1,rd);
		markinvar(n->E2,rd);
		break;
	case OPandand:
	case OPoror:
		markinvar(n->E1,rd);
		tmp = vec_clone(rd);
		markinvar(n->E2,tmp);
		vec_orass(rd,tmp);		/* rd |= tmp		*/
		vec_free(tmp);
		break;
	case OPcolon:
	case OPcolon2:
		tmp = vec_clone(rd);
		markinvar(n->E1,rd);
		markinvar(n->E2,tmp);
		vec_orass(rd,tmp);		/* rd |= tmp		*/
		vec_free(tmp);
		break;
	case OPaddr:		// mark addresses of OPvars as LI
		markinvar(n->E1,rd);
		if (n->E1->Eoper == OPvar || isLI(n->E1))
			makeLI(n);
		break;
	case OPmsw:
	case OPneg:	case OPbool:	case OPnot:	case OPcom:
	case OPshtlng:	case OPd_s32:	case OPs32_d:
	case OPdblint:	case OPs16_d:	case OPd_f:	case OPf_d:
	case OPlngsht:	case OPu8int:
	case OPld_d:	case OPd_ld:
	case OPld_u64:
	case OPc_r:	case OPc_i:
	case OParraylength:
	case OPnullcheck:
#if TX86
	case OPu16_32:
#else
	case OPunslng:
#endif
	case OPu16_d:	case OPdbluns:
	case OPs8int:	case OPint8:
        case OPd_u32:	case OPu32_d:
#if TARGET_68K
	case OPacos:
	case OPasin:
	case OPatan:
	case OPatanh:
	case OPcos:
	case OPcosh:
	case OPexp:
	case OPexp1:
	case OPexp10:
	case OPlog:
	case OPlog10:
	case OPlog2:
	case OPsin:
	case OPsincos:
	case OPsinh:
	case OPtan:
	case OPtanh:
#endif

#if LONGLONG
	case OPlngllng:	case OPulngllng:
	case OP64_32:
	case OPd_s64:	case OPd_u64:
	case OPs64_d:
	case OPu64_d:
#endif
#if !(TARGET_POWERPC)
	case OPabs:
	case OPsqrt:
	case OPrndtol:
	case OPsin:
	case OPcos:
	case OPrint:
#endif
#if TX86
	case OPvptrfptr: /* BUG for MacHandles */
	case OPtofar16:	case OPfromfar16: case OPoffset: case OPptrlptr: 
	case OPcvptrfptr:
	case OPsetjmp:
	case OPbsf:
	case OPbsr:
	case OPbswap:
#else
	case OPsfltdbl: case OPdblsflt:
#endif
		markinvar(n->E1,rd);
		if (isLI(n->E1))	/* if child is LI		*/
			makeLI(n);
		break;
	case OPeq:
	case OPstreq:
		markinvar(n->E2,rd);
		n1 = n->E1;
		markinvar(n1,rd);

		/* Determine if assignment is LI. Conditions are:	*/
		/* 1) Rvalue is LI					*/
		/* 2) Lvalue is a variable (simplifies things a lot)	*/
		/* 3) Lvalue can only be affected by unambiguous defs	*/
		/* 4) No rd's of lvalue that are within the loop (other	*/
		/*    than the current def)				*/
		if (isLI(n->E2) && n1->Eoper == OPvar)		/* 1 & 2 */
		{   v = n1->EV.sp.Vsym;
#if UNAMBIG
		    if (v->Sflags & SFLunambig)
#endif
		    {
			tmp = vec_calloc(deftop);
			//filterrd(tmp,rd,v);
			listrds(rd,n1,tmp);
			foreach (i,deftop,tmp)
			    if (defnod[i].DNelem != n &&
				vec_testbit(defnod[i].DNblock->Bdfoidx,lv))
				    goto L3;
			makeLI(n);	// then the def is LI
		    L3:	vec_free(tmp);
		    }
		}
		goto L2;

	case OPadd:	case OPmin:	case OPmul:	case OPand:
	case OPor:	case OPxor:	case OPdiv:	case OPmod:
	case OPshl:	case OPshr:	case OPeqeq:	case OPne:
	case OPlt:	case OPle:	case OPgt:	case OPge:
	case OPashr:

	case OPunord:	case OPlg:	case OPleg:	case OPule:
	case OPul:	case OPuge:	case OPug:	case OPue:
	case OPngt:	case OPnge:	case OPnlt:	case OPnle:
	case OPord:	case OPnlg:	case OPnleg:	case OPnule:
	case OPnul:	case OPnuge:	case OPnug:	case OPnue:

	case OPinstanceof:
	case OPfinalinstanceof:
	case OPcheckcast:
	case OPcomma:
	case OPpair:
	case OPrpair:
	case OPscale:
	case OPremquo:
	case OPyl2x:
	case OPyl2xp1:
		markinvar(n->E1,rd);
		markinvar(n->E2,rd);
		if (isLI(n->E2) && isLI(n->E1))
			makeLI(n);
		break;

	case OPind:			/* must assume this is not LI	*/
		markinvar(n->E1,rd);
		if (isLI(n->E1))
		{
#if 0
		    // This doesn't work with C++, because exp2_ptrtocomtype() will
		    // transfer const to where it doesn't belong.
		    if (n->Ety & mTYconst)
		    {
			makeLI(n);
		    }
#endif
#if 0
		    // This was disabled because it was marking as LI
		    // the loop dimension for the [i] array if
		    // a[j][i] was in a loop. This meant the a[j] array bounds
	 	    // check for the a[j].length was skipped.
		    else if (n->Ejty)
		    {
			tmp = vec_calloc(deftop);
			filterrdind(tmp,rd,n);	// only the RDs pertaining to n

			// if (no RDs within loop)
			//	then it's loop invariant

			foreach (i,deftop,tmp)		// for each RD
			    if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv))
				goto L10;	// found a RD in the loop

			// If gref has occurred, this can still be LI
			// if n is an AE that was also an AE at the
			// point of gref.
			// We can catch a subset of these cases by looking
			// at the AEs at the start of the loop.
			if (gref)
			{   int j;

			    //printf("\tn is: "); WReqn(n); printf("\n");
			    foreach (j,exptop,gin)
			    {   elem *e = expnod[j];

				//printf("\t\texpnod[%d] = %p\n",j,e);
				//printf("\t\tAE is: "); WReqn(e); printf("\n");
				if (el_match2(n,e))
				{
				    makeLI(n);
				    //printf("Ind LI: "); WReqn(n); printf("\n");
				    break;
				}
			    }
			}
			else
			    makeLI(n);
		L10:	vec_free(tmp);
			break;
		    }
#endif
		}
		break;
	case OPvar:
		v = n->EV.sp.Vsym;
#if UNAMBIG
		if (v->Sflags & SFLunambig)	// must be unambiguous to be LI
#endif
		{
		    tmp = vec_calloc(deftop);
		    //filterrd(tmp,rd,v);	// only the RDs pertaining to v
		    listrds(rd,n,tmp);	// only the RDs pertaining to v

		    // if (no RDs within loop)
		    //	then it's loop invariant

		    foreach (i,deftop,tmp)		// for each RD
			if (vec_testbit(defnod[i].DNblock->Bdfoidx,lv))
			    goto L1;	// found a RD in the loop
		    makeLI(n);

		L1: vec_free(tmp);
		}
		break;
	case OPstring:
	case OPrelconst:
	case OPconst:			/* constants are always LI	*/
	case OPhstring:
	case OPframeptr:
		makeLI(n);
		break;
	case OPinfo:
		markinvar(n->E2,rd);
		break;

	case OPstrthis:
	case OPmark:
	case OPctor:
	case OPdtor:
	case OPhalt:
	case OPgot:			// shouldn't OPgot be makeLI ?
		break;

	default:
#ifdef DEBUG
		WROP(n->Eoper);
#endif
		//printf("n->Eoper = %d, OPconst = %d\n", n->Eoper, OPconst);
		assert(0);
  }
#ifdef DEBUG
  if (debugc && isLI(n))
  {	dbg_printf("  LI elem: ");
	WReqn(n);
	dbg_printf("\n");
  }
#endif
}

/********************
 * Update rd vector.
 * Input:
 *	n	assignment elem or function call elem or OPasm elem
 *	rd	reaching def vector to update
 *		(clear bits for defs we kill, set bit for n (which is the
 *		 def we are genning))
 *	vecdim	deftop
 */

void updaterd(elem *n,vec_t GEN,vec_t KILL)
{   unsigned op = n->Eoper;
    unsigned i;
    unsigned ni;
    elem *t;

    assert(OTdef(op));
    assert(GEN);
    elem_debug(n);

    // If unambiguous def
    if (OTassign(op) && (t = n->E1)->Eoper == OPvar)
    {	symbol *d = t->EV.sp.Vsym;
	targ_size_t toff = t->EV.sp.Voffset;
	targ_size_t tsize;
	targ_size_t ttop;

	tsize = (op == OPstreq) ? n->Enumbytes : tysize(t->Ety);
	ttop = toff + tsize;

	//printf("updaterd: "); WReqn(n); printf(" toff=%d, tsize=%d\n", toff, tsize);

	ni = (unsigned)-1;

	/* for all unambig defs in defnod[] */
	for (i = 0; i < deftop; i++)
	{   elem *tn = defnod[i].DNelem;
	    elem *tn1;
	    targ_size_t tn1size;

	    if (tn == n)
		ni = i;

	    if (!OTassign(tn->Eoper))
		continue;

	    // If def of same variable, kill that def
	    tn1 = tn->E1;
	    if (tn1->Eoper != OPvar || d != tn1->EV.sp.Vsym)
		continue;

	    // If t completely overlaps tn1
	    tn1size = (tn->Eoper == OPstreq)
		? tn->Enumbytes : tysize(tn1->Ety);
	    if (toff <= tn1->EV.sp.Voffset &&
		tn1->EV.sp.Voffset + tn1size <= ttop)
	    {
		if (KILL)
		    vec_setbit(i,KILL);
		vec_clearbit(i,GEN);
	    }
	}
	assert(ni != -1);
    }
#if 0
    else if (OTassign(op) && t->Eoper != OPvar && t->Ejty)
    {
	ni = -1;

	// for all unambig defs in defnod[]
	for (i = 0; i < deftop; i++)
	{   elem *tn = defnod[i].DNelem;
	    elem *tn1;

	    if (tn == n)
		ni = i;

	    if (!OTassign(tn->Eoper))
		continue;

	    // If def of same variable, kill that def
	    tn1 = tn->E1;
	    if (tn1->Eoper != OPind || t->Ejty != tn1->Ejty)
		continue;

	    if (KILL)
		vec_setbit(i,KILL);
	    vec_clearbit(i,GEN);
	}
	assert(ni != -1);
    }
#endif
    else
    {
	/* Set bit in GEN for this def */
	for (i = 0; 1; i++)
	{   assert(i < deftop);		// should find n in defnod[]
	    if (defnod[i].DNelem == n)
	    {   ni = i;
		break;
	    }
	}
    }

    vec_setbit(ni,GEN);			// set bit in GEN for this def
}

/***************************
 * Mark all elems as not being loop invariant.
 */

STATIC void unmarkall(elem *e)
{
  for (; 1; e = e->E1)
  {
	assert(e);
	e->Nflags &= ~NFLli;		/* unmark this elem		*/
	if (OTunary(e->Eoper))
		continue;
	else if (OTbinary(e->Eoper))
	{	unmarkall(e->E2);
		continue;
	}
	return;
  }
}

/*******************************
 * Take a RD vector and filter out all RDs but
 * ones that are defs of symbol s.
 * Output:
 *	f
 */

#if 0 // replaced by listrds()
void filterrd(vec_t f,vec_t rd,symbol *s)
{
  register unsigned i;
  register elem *n;

  vec_copy(f,rd);
#if UNAMBIG
  foreach (i,deftop,f)			/* for each def in f		*/
  {	n = defnod[i].DNelem;		/* the definition elem		*/
	elem_debug(n);
	if (n->Eoper == OPasm)		// OPasm defs always reach (sigh)
		continue;
	/* Clear bit if it's not an unambiguous def of si		*/
	if (OTassign(n->Eoper))		/* if assignment elem		*/
	{	if (!(n->E1->Eoper == OPvar && n->E1->EV.sp.Vsym == s
		   ))
			vec_clearbit(i,f);
	}
	else				/* else ambiguous def		*/
	    vec_clearbit(i,f);		// and couldn't def this var
  }
#else
  assert(0);				/* not implemented		*/
#endif
}
#endif

/*******************************
 * Take a RD vector and filter out all RDs but
 * ones that are possible defs of OPind elem e.
 * Output:
 *	f
 */

#if 0

STATIC void filterrdind(vec_t f,vec_t rd,elem *e)
{
    unsigned i;
    elem *n;
    tym_t jty = e->Ejty;

    vec_copy(f,rd);
#if UNAMBIG
    foreach (i,deftop,f)		// for each def in f
    {	n = defnod[i].DNelem;		// the definition elem
	elem_debug(n);
	if (n->Eoper == OPasm)		// OPasm defs always reach (sigh)
		continue;
	// Clear bit if it's not an unambiguous def of si
	if (OTassign(n->Eoper))		// if assignment elem
	{	elem *n1 = n->E1;

		if (n1->Eoper == OPind)
		{
		    if (jty && n1->Ejty && jty != n1->Ejty)
			vec_clearbit(i,f);
		}
		else if (n1->Eoper == OPvar)
		{
		    if (jty || n1->EV.sp.Vsym->Sflags & SFLunambig)
			vec_clearbit(i,f);
		}
	}
	else if (OTcall(n->Eoper) && el_noreturn(n))
	    vec_clearbit(i,f);
    }
#else
    assert(0);				// not implemented
#endif
}

#endif

/********************************
 * Return TRUE if there are any refs of v in n before nstop is encountered.
 * Input:
 *	refstop = -1
 */

static int refstop;			/* flag to stop refs()			*/

STATIC bool refs(symbol *v,elem *n,elem *nstop)
{ register bool f;
  register unsigned op;

  symbol_debug(v);
  elem_debug(n);
  assert(symbol_isintab(v));
  assert(v->Ssymnum < globsym.top);
  assert(n);

  op = n->Eoper;
#if UNAMBIG
  if (refstop == 0)
	return FALSE;
  f = FALSE;
  if (OTunary(op))
	f = refs(v,n->E1,nstop);
  else if (OTbinary(op))
  {	if (ERTOL(n))			/* watch order of evaluation	*/
	{
	    /* Note that (OPvar = e) is not a ref of OPvar, whereas	*/
	    /* ((OPbit OPvar) = e) is a ref of OPvar, and (OPvar op= e) is */
	    /* a ref of OPvar, etc.					*/
	    f = refs(v,n->E2,nstop);
	    if (!f)
	    {	if (op == OPeq)
		{	if (n->E1->Eoper != OPvar)
				f = refs(v,n->E1->E1,nstop);
		}
		else
			f = refs(v,n->E1,nstop);
	    }
	}
	else
		f = refs(v,n->E1,nstop) || refs(v,n->E2,nstop);
  }

  if (n == nstop)
	refstop = 0;
  else if (n->Eoper == OPvar)		/* if variable reference	*/
	return v == n->EV.sp.Vsym;
  else if (op == OPasm)			/* everything is referenced	*/
	return TRUE;
  return f;
#else
  assert(0);
#endif
}

/*************************
 * Move LIs to preheader.
 * Conditions to be satisfied for code motion are:
 *	1) All exit blocks are dominated (TRUE before this is called).
 *			-- OR --
 *	2) Variable assigned by a statement is not live on entering
 *	   any successor outside the loop of any exit block of the
 *	   loop.
 *
 *	3) Cannot move assignment to variable if there are any other
 *	   assignments to that variable within the loop (TRUE or
 *	   assignment would not have been marked LI).
 *	4) Cannot move assignments to a variable if there is a use
 *	   of that variable in this loop that is reached by any other
 *	   def of it.
 *	5) Cannot move expressions that have side effects.
 *	6) Do not move assignments to variables that could be affected
 *	   by ambiguous defs.
 *	7) It is not worth it to move expressions of the form:
 *		(var == const)
 * Input:
 *	n	the elem we're considering moving
 *	b	the block this elem is in
 *	l	the loop we're in
 *	domexit	flags
 *	bit 0:	1	this branch is always executed
 *		0	this branch is only sometimes executed
 *	bit 1:	1	do not move LIs that could throw exceptions
 *			or cannot be moved past possibly thrown exceptions
 * Returns:
 *	revised domexit
 */

STATIC void movelis(elem *n,block *b,loop *l,int *pdomexit)
{ register unsigned i,j,op;
  register vec_t tmp;
  register elem *ne,*t,*n2;
  register list_t nl;
  symbol *v;
  tym_t ty;
  tym_t jty;

Lnextlis:
  //if (isLI(n)) { printf("movelis("); WReqn(n); printf(")\n"); }
  assert(l && n);
  elem_debug(n);
  op = n->Eoper;
  switch (op)
  {
	case OPvar:
	case OPconst:
	case OPrelconst:
	    goto Lret;

	case OPandand:
	case OPoror:
	case OPcond:
	{   int domexit;

	    movelis(n->E1,b,l,pdomexit);	// always executed
	    domexit = *pdomexit & ~1;	// sometimes executed
	    movelis(n->E2,b,l,&domexit);
	    *pdomexit |= domexit & 2;
	    goto Lret;
	}

	case OPeq:
	    // Do loop invariant assignments
	    if (isLI(n) && n->E1->Eoper == OPvar)
	    {   v = n->E1->EV.sp.Vsym;		// variable index number

	#ifdef UNAMBIG
		if (!(v->Sflags & SFLunambig)) goto L3;		// case 6
	#endif

		// If case 4 is not satisfied, return

		// Function parameters have an implied definition prior to the
		// first block of the function. Unfortunately, the rd vector
		// does not take this into account. Therefore, we assume the
		// worst and reject assignments to function parameters.
		if (v->Sclass == SCparameter || v->Sclass == SCregpar
	#if TX86
		 || v->Sclass == SCfastpar
	#endif
		    )
			goto L3;

		if (el_sideeffect(n->E2)) goto L3;		// case 5

		// If case 1 or case 2 is not satisfied, return

		if (!(*pdomexit & 1))			// if not case 1
		{
		    foreach (i,dfotop,l->Lexit)		// for each exit block
		    {   register list_t bl;

			for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
			{   block *s;		// successor to exit block

			    s = list_block(bl);
			    if (!vec_testbit(s->Bdfoidx,l->Lloop) &&
				(!symbol_isintab(v) ||
				 vec_testbit(v->Ssymnum,s->Binlv))) // if v is live on exit
				    goto L3;
			}
		    }
		}

		tmp = vec_calloc(deftop);
		foreach (i,dfotop,l->Lloop)	// for each block in loop
		{
			if (dfo[i] == b)	// except this one
				continue;

			//<if there are any RDs of v in Binrd other than n>
			//	<if there are any refs of v in that block>
			//		return;

			//filterrd(tmp,dfo[i]->Binrd,v);
			listrds(dfo[i]->Binrd,n->E1,tmp);
			foreach (j,deftop,tmp)	// for each RD of v in Binrd
			{   if (defnod[j].DNelem == n)
					continue;
				refstop = -1;
				if (dfo[i]->Belem &&
				    refs(v,dfo[i]->Belem,(elem *)NULL)) //if refs of v
				{   vec_free(tmp);
					goto L3;
				}
				break;
			}
		} // foreach

		// <if there are any RDs of v in b->Binrd other than n>
		//	<if there are any references to v before the
		//	 assignment to v>
		//		<can't move this assignment>

		//filterrd(tmp,b->Binrd,v);
		listrds(b->Binrd,n->E1,tmp);
		foreach (j,deftop,tmp)		// for each RD of v in Binrd
		{   if (defnod[j].DNelem == n)
			    continue;
			refstop = -1;
			if (b->Belem && refs(v,b->Belem,n))
			{   vec_free(tmp);
			    goto L3;		// can't move it
			}
			break;			// avoid redundant looping
		}
		vec_free(tmp);

		// We have an LI assignment, n.
		// Check to see if the rvalue is already in the preheader.
		for (nl = l->Llis; nl; nl = list_next(nl))
		{
		    if (el_match(n->E2,list_elem(nl)->E2))
		    {
			el_free(n->E2);
			n->E2 = el_calloc();
			el_copy(n->E2,list_elem(nl)->E1);
			cmes("LI assignment rvalue was replaced\n");
			doflow = TRUE;
			changes++;
			break;
		    }
		}

		// move assignment elem to preheader
		cmes("Moved LI assignment ");
	#ifdef DEBUG
		if (debugc)
		{   WReqn(n);
			dbg_printf(";\n");
		}
	#endif
		changes++;
		doflow = TRUE;			// redo flow analysis
		ne = el_calloc();
		el_copy(ne,n);			// create assignment elem
		assert(l->Lpreheader);		// make sure there is one
		appendelem(ne,&(l->Lpreheader->Belem));	// append ne to preheader
		list_prepend(&l->Llis,ne);

		el_copy(n,ne->E1);	// replace n with just a reference to v
		goto Lret;
	    } // if
	    break;

	case OPcall:
	case OPucall:
	    *pdomexit |= 2;
	    break;
  }

L3:
  // Do leaves of non-LI expressions, leaves of = elems that didn't
  // meet the invariant assignment removal criteria, and don't do leaves
  if (OTleaf(op))
	goto Lret;
  if (!isLI(n) || op == OPeq || op == OPcomma || OTrel(op) || op == OPnot ||
      // These are usually addressing modes, so moving them is a net loss
      (I32 && op == OPshl && n->E2->Eoper == OPconst && el_tolong(n->E2) <= 3ull)
     )
  {
	if (OTassign(op))
	{	elem *n1 = n->E1;
		elem *n11;

		if (OTbinary(op))
		    movelis(n->E2,b,l,pdomexit);

		// Do lvalue only if it is an expression
		if (n1->Eoper == OPvar)
		    goto Lret;
		n11 = n1->E1;
		if (OTbinary(n1->Eoper))
		{
		    movelis(n11,b,l,pdomexit);
		    n = n1->E2;
		}
		// If *(x + c), just make x the LI, not the (x + c).
		// The +c comes free with the addressing mode.
		else if (n1->Eoper == OPind &&
			isLI(n11) &&
			n11->Eoper == OPadd &&
			n11->E2->Eoper == OPconst
			)
		{
		    n = n11->E1;
		}
		else
		    n = n11;
		movelis(n,b,l,pdomexit);
		if (b->Btry || !(n1->Eoper == OPvar && symbol_isintab(n1->EV.sp.Vsym)))
		{
		    //printf("assign to global => domexit |= 2\n");
		    *pdomexit |= 2;
		}
	}
	else if (OTunary(op))
	{   elem *e1 = n->E1;

	    // If *(x + c), just make x the LI, not the (x + c).
	    // The +c comes free with the addressing mode.
	    if (op == OPind &&
		isLI(e1) &&
		e1->Eoper == OPadd &&
		e1->E2->Eoper == OPconst
	       )
	    {
		n = e1->E1;
	    }
	    else
		n = e1;
	}
	else if (OTbinary(op))
	{	movelis(n->E1,b,l,pdomexit);
		n = n->E2;
	}
	goto Lnextlis;
  }

  if (el_sideeffect(n))
	goto Lret;

#if 0
printf("*pdomexit = %d\n",*pdomexit);
    if (*pdomexit & 2)
    {
	// If any indirections, can't LI it

	// If this operand has already been indirected, we can let
	// it pass.
	Symbol *s;

printf("looking at:\n");
elem_print(n);
	s = el_basesym(n->E1);
	if (s)
	{
	    for (nl = l->Llis; nl; nl = list_next(nl))
	    {   elem *el;
		tym_t ty2;

		el = list_elem(nl);
		el = el->E2;
elem_print(el);
		if (el->Eoper == OPind && el_basesym(el->E1) == s)
		{
printf("  pass!\n");
		    goto Lpass;
		}
	    }
	}
printf("  skip!\n");
	goto Lret;

    Lpass:
	;
    }
#endif

  // Move the LI expression to the preheader
  cmes("Moved LI expression ");
#ifdef DEBUG
  if (debugc)
  {	WReqn(n);
	dbg_printf(";\n");
  }
#endif

  // See if it's already been moved
  ty = n->Ety;
  for (nl = l->Llis; nl; nl = list_next(nl))
  {	elem *el;
	tym_t ty2;

	el = list_elem(nl);
	//printf("existing LI: "); WReqn(el); printf("\n");
	ty2 = el->E2->Ety;
	if (tysize(ty) == tysize(ty2))
	{   el->E2->Ety = ty;
	    if (el_match(n,el->E2))
	    {
		el->E2->Ety = ty2;
		if (!OTleaf(n->Eoper))
		{	el_free(n->E1);
			if (OTbinary(n->Eoper))
				el_free(n->E2);
		}
		el_copy(n,el->E1);	// make copy of temp
		n->Ety = ty;
#ifdef DEBUG
		if (debugc)
		{   dbg_printf("Already moved: LI expression replaced with ");
		    WReqn(n);
		    dbg_printf("\nPreheader %d expression %p ",
		    l->Lpreheader->Bdfoidx,l->Lpreheader->Belem);
		    WReqn(l->Lpreheader->Belem);
		    dbg_printf("\n");
		}
#endif
		changes++;
		doflow = TRUE;			// redo flow analysis
		goto Lret;
	    }
	    el->E2->Ety = ty2;
	}
  }

  if (!(*pdomexit & 1))				// if only sometimes executed
  {	cmes(" doesn't dominate exit\n");
	goto Lret;				// don't move LI
  }

  if (tyaggregate(n->Ety))
	goto Lret;

  changes++;
  doflow = TRUE;				// redo flow analysis

  t = el_alloctmp(n->Ety);			/* allocate temporary t	*/
#if DEBUG
    cmes2("movelis() introduced new variable '%s' of type ",t->EV.sp.Vsym->Sident);
    if (debugc) WRTYxx(t->Ety);
    cmes("\n");
#endif
  n2 = el_calloc();
  el_copy(n2,n);				/* create copy n2 of n	*/
  ne = el_bin(OPeq,t->Ety,t,n2);		/* create elem t=n2	*/
  assert(l->Lpreheader);			/* make sure there is one */
  appendelem(ne,&(l->Lpreheader->Belem));	/* append ne to preheader */
#ifdef DEBUG
  if (debugc)
  {	dbg_printf("Preheader %d expression %p\n\t",
	l->Lpreheader->Bdfoidx,l->Lpreheader->Belem);
	WReqn(l->Lpreheader->Belem);
	dbg_printf("\nLI expression replaced with "); WReqn(t);
	dbg_printf("\n");
  }
#endif
  el_copy(n,t);					/* replace this elem with t */

  // Remember LI expression in elem list
  list_prepend(&l->Llis,ne);

Lret:
    ;
}

/***************************
 * Append elem to existing elem using an OPcomma elem.
 * Input:
 *	n	elem to append
 *	*pn	elem to append to
 */

STATIC void appendelem(register elem *n,elem **pn)
{
  assert(n && pn);
  if (*pn)					/* if this elem exists	*/
  {	while ((*pn)->Eoper == OPcomma)		/* while we see OPcomma elems */
	{   (*pn)->Ety = n->Ety;
	    pn = &((*pn)->E2);			/* cruise down right side */
	}
	*pn = el_bin(OPcomma,n->Ety,*pn,n);
  }
  else
	*pn = n;				/* else create a elem	*/
}

/************** LOOP INDUCTION VARIABLES **********************/

/***************************
 * Allocate famlist.
 */

famlist *famlist::freelist = NULL;

famlist *famlist::mycalloc()
{   famlist *fl;

    if (freelist)
    {
	fl = freelist;
	freelist = fl->FLnext;
	memset(fl,0,sizeof(famlist));
    }
    else
	fl = (famlist *) mem_calloc(sizeof(famlist));
    return fl;
}

/***************************
 * Allocate Iv.
 */

Iv *Iv::freelist = NULL;

Iv *Iv::mycalloc()
{   Iv *iv;

    if (freelist)
    {
	iv = freelist;
	freelist = iv->IVnext;
	memset(iv,0,sizeof(Iv));
    }
    else
	iv = (Iv *) mem_calloc(sizeof(Iv));
    return iv;
}

/*********************
 * Free iv list.
 */

STATIC void freeivlist(register Iv *biv)
{ register Iv *bivnext;

#if TX86
  while (biv)
  {	register famlist *fl,*fln;

	for (fl = biv->IVfamily; fl; fl = fln)
	{	el_free(fl->c1);
		el_free(fl->c2);
		fln = fl->FLnext;

		fl->FLnext = famlist::freelist;
		famlist::freelist = fl;
	}
	bivnext = biv->IVnext;

	biv->IVnext = Iv::freelist;
	Iv::freelist = biv;

	biv = bivnext;
  }
#endif
}

/****************************
 * Create a new famlist entry.
 */

STATIC famlist * newfamlist(tym_t ty)
{	register famlist *fl;
	union eve c;

	memset(&c,0,sizeof(c));
	fl = famlist::mycalloc();
	fl->FLty = ty;
	switch (tybasic(ty))
	{   case TYfloat:
		c.Vfloat = 1;
		break;
	    case TYdouble:
	    case TYdouble_alias:
		c.Vdouble = 1;
		break;
	    case TYldouble:
#if TARGET_68K && __POWERPC
	    	c.Vldouble = Xone();
#else
	    	c.Vldouble = 1;
#endif
		break;
#if TARGET_68K && !__POWERPC
	    case TYcomp:
	    	c.Vldouble = 1;
		ty = TYldouble;
		break;
#endif
#if _MSDOS || __OS2__ || _WIN32		// if no byte ordering problems
	    case TYsptr:
	    case TYcptr:
#if JHANDLE
	    case TYjhandle:
#endif
	    case TYnptr:
	    case TYfptr:
	    case TYvptr:
		/* Convert pointers to integrals to avoid things like	*/
		/* multiplying pointers					*/
		ty = TYptrdiff;
		/* FALL-THROUGH */
	    default:
		c.Vlong = 1;
		break;
#if TX86
	    case TYhptr:
		ty = TYlong;
		c.Vlong = 1;
		break;
#endif
#else
	    case TYbool:
	    case TYchar:
	    case TYschar:
	    case TYuchar:
		c.Vchar = 1;
		break;
	    case TYshort:
	    case TYushort:
	    case TYwchar_t:		// BUG: what about 4 byte wchar_t's?
		c.Vshort = 1;
		break;
#if TX86
	    case TYsptr:
	    case TYcptr:
#if JHANDLE
	    case TYjhandle:
#endif
	    case TYnptr:
#endif
	    case TYfptr:
	    case TYvptr:
		ty = TYint;
		/* FALL-THROUGH */
	    case TYint:
	    case TYuint:
		c.Vint = 1;
		break;
#if TX86
	    case TYhptr:
		ty = TYlong;
#endif
	    case TYlong:
	    case TYulong:
	    default:
		c.Vlong = 1;
		break;
#if 0
	    default:
		printf("ty = x%x\n", tybasic(ty));
		assert(0);
#endif
#endif
	}
	fl->c1 = el_const(ty,&c);		/* c1 = 1		*/
	c.Vldouble = 0;
	if (typtr(ty))
	{
#if TX86
	    ty = (tybasic(ty) == TYhptr) ? TYlong : TYint;
#else
	    ty = TYint;
#endif
	}
	fl->c2 = el_const(ty,&c);		/* c2 = 0		*/
	return fl;
}

/***************************
 * Remove induction variables from loop l.
 * Loop invariant removal should have been done just previously.
 */

STATIC void loopiv(register loop *l)
{
  cmes2("loopiv(%p)\n",l);
  assert(l->Livlist == NULL && l->Lopeqlist == NULL);
  elimspec(l);
  if (doflow)
  {	flowrd();		/* compute reaching defs		*/
	flowlv();		/* compute live variables		*/
	flowae();		// compute available expressions
	doflow = FALSE;
  }
  findbasivs(l);		/* find basic induction variables	*/
  findopeqs(l);			// find op= variables
  findivfams(l);		/* find IV families			*/
  elimfrivivs(l);		/* eliminate less useful family IVs	*/
  intronvars(l);		/* introduce new variables		*/
  elimbasivs(l);		/* eliminate basic IVs			*/
  elimopeqs(l);			// eliminate op= variables

  freeivlist(l->Livlist);	// free up IV list
  l->Livlist = NULL;
  freeivlist(l->Lopeqlist);	// free up list
  l->Lopeqlist = NULL;

  /* Do copy propagation and dead assignment elimination	*/
  /* upon return to optfunc()					*/
}

/*************************************
 * Find basic IVs of loop l.
 * A basic IV x of loop l is a variable x which has
 * exactly one assignment within l of the form:
 * x += c or x -= c, where c is either a constant
 * or a LI.
 * Input:
 *	defnod[] loaded with all the definition elems of the loop
 */

STATIC void findbasivs(loop *l)
{ vec_t poss,notposs;
  elem *n;
  unsigned i,j;
  bool ambdone;

  assert(l);
  ambdone = FALSE;
  poss = vec_calloc(globsym.top);
  notposs = vec_calloc(globsym.top);		/* vector of all variables	*/
					/* (initially all unmarked)	*/

  /* for each def in defnod[] that is within loop l	*/

  for (i = 0; i < deftop; i++)
  {	if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop))
		continue;		/* def is not in the loop	*/

	n = defnod[i].DNelem;
	elem_debug(n);
	if (OTassign(n->Eoper) && n->E1->Eoper == OPvar)
	{   symbol *s;			/* if unambiguous def		*/

	    s = n->E1->EV.sp.Vsym;
	    if (symbol_isintab(s))
	    {
		SYMIDX v;

		v = n->E1->EV.sp.Vsym->Ssymnum;
		if ((n->Eoper == OPaddass || n->Eoper == OPminass ||
		     n->Eoper == OPpostinc || n->Eoper == OPpostdec) &&
			(cnst(n->E2) ||	/* if x += c or x -= c		*/
			 n->E2->Eoper == OPvar && isLI(n->E2)))
		{	if (vec_testbit(v,poss))
				/* We've already seen this def elem,	*/
				/* therefore there is more than one	*/
				/* def of v within the loop, therefore	*/
				/* v is not a basic IV.			*/
				vec_setbit(v,notposs);
			else
				vec_setbit(v,poss);
		}
		else			/* else mark as not possible	*/
			vec_setbit(v,notposs);
	    }
	}
	else				/* else ambiguous def		*/
	{	/* mark any vars that could be affected by		*/
		/* this def as not possible				*/

		if (!ambdone)		/* avoid redundant loops	*/
		{	for (j = 0; j < globsym.top; j++)
			{	if (!(globsym.tab[j]->Sflags & SFLunambig))
					vec_setbit(j,notposs);
			}
			ambdone = TRUE;
		}
	}
  }
#if 0
  dbg_printf("poss    "); vec_println(poss);
  dbg_printf("notposs "); vec_println(notposs);
#endif
  vec_subass(poss,notposs);		/* poss = poss - notposs	*/

  /* create list of IVs	*/
  foreach (i,globsym.top,poss)		/* for each basic IV		*/
  {	register Iv *biv;
	symbol *s;

	/* Skip if we don't want it to be a basic IV (see funcprev())	*/
	s = globsym.tab[i];
	assert(symbol_isintab(s));
	if (s->Sflags & SFLnotbasiciv)
		continue;

	// Do not use aggregates as basic IVs. This is because the other loop
	// code doesn't check offsets into symbols, (assuming everything
	// is at offset 0). We could perhaps amend this by allowing basic IVs
	// if the struct consists of only one data member.
	if (tyaggregate(s->ty()))
		continue;

	biv = Iv::mycalloc();
	biv->IVnext = l->Livlist;
	l->Livlist = biv;		// link into list of IVs

	biv->IVbasic = s;		// symbol of basic IV

	cmes3("Symbol '%s' (%d) is a basic IV, ",s->Sident
		? (char *)s->Sident : "",i);

	/* We have the sym idx of the basic IV. We need to find		*/
	/* the parent of the increment elem for it.			*/

	/* First find the defnod[]	*/
	for (j = 0; j < deftop; j++)
	{	/* If defnod is a def of i and it is in the loop	*/
		if (defnod[j].DNelem->E1 &&	/* OPasm are def nodes	*/
		    defnod[j].DNelem->E1->EV.sp.Vsym == s &&
		    vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop))
			goto L1;
	}
	assert(0);			/* should have found it		*/
	/* NOTREACHED */

    L1:	biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem));
	assert(s == (*biv->IVincr)->E1->EV.sp.Vsym);
#ifdef DEBUG
	if (debugc)
	{   dbg_printf("Increment elem is: "); WReqn(*biv->IVincr);	dbg_printf("\n"); }
#endif
  }

  vec_free(poss);
  vec_free(notposs);
}

/*************************************
 * Find op= elems of loop l.
 * Analogous to findbasivs().
 * Used to eliminate useless loop code normally found in benchmark programs.
 * Input:
 *	defnod[] loaded with all the definition elems of the loop
 */

STATIC void findopeqs(loop *l)
{   vec_t poss,notposs;
    elem *n;
    unsigned i,j;
    bool ambdone;

    assert(l);
    ambdone = FALSE;
    poss = vec_calloc(globsym.top);
    notposs = vec_calloc(globsym.top);	// vector of all variables
					// (initially all unmarked)

    // for each def in defnod[] that is within loop l

    for (i = 0; i < deftop; i++)
    {	if (!vec_testbit(defnod[i].DNblock->Bdfoidx,l->Lloop))
		continue;		// def is not in the loop

	n = defnod[i].DNelem;
	elem_debug(n);
	if (OTopeq(n->Eoper) && n->E1->Eoper == OPvar)
	{   symbol *s;			// if unambiguous def

	    s = n->E1->EV.sp.Vsym;
	    if (symbol_isintab(s))
	    {
		SYMIDX v;

		v = n->E1->EV.sp.Vsym->Ssymnum;
		{	if (vec_testbit(v,poss))
				// We've already seen this def elem,
				// therefore there is more than one
				// def of v within the loop, therefore
				// v is not a basic IV.
				vec_setbit(v,notposs);
			else
				vec_setbit(v,poss);
		}
	    }
	}
	else				// else ambiguous def
	{	// mark any vars that could be affected by
		// this def as not possible

		if (!ambdone)		// avoid redundant loops
		{	for (j = 0; j < globsym.top; j++)
			{	if (!(globsym.tab[j]->Sflags & SFLunambig))
					vec_setbit(j,notposs);
			}
			ambdone = TRUE;
		}
	}
    }

    // Don't use symbols already in Livlist
    for (Iv *iv = l->Livlist; iv; iv = iv->IVnext)
    {	symbol *s;

	s = iv->IVbasic;
	vec_setbit(s->Ssymnum,notposs);
    }


#if 0
    dbg_printf("poss    "); vec_println(poss);
    dbg_printf("notposs "); vec_println(notposs);
#endif
    vec_subass(poss,notposs);		// poss = poss - notposs

    // create list of IVs
    foreach (i,globsym.top,poss)	// for each opeq IV
    {	register Iv *biv;
	symbol *s;

	s = globsym.tab[i];
	assert(symbol_isintab(s));

	// Do not use aggregates as basic IVs. This is because the other loop
	// code doesn't check offsets into symbols, (assuming everything
	// is at offset 0). We could perhaps amend this by allowing basic IVs
	// if the struct consists of only one data member.
	if (tyaggregate(s->ty()))
		continue;

	biv = Iv::mycalloc();
	biv->IVnext = l->Lopeqlist;
	l->Lopeqlist = biv;		// link into list of IVs

	biv->IVbasic = s;		// symbol of basic IV

	cmes3("Symbol '%s' (%d) is an opeq IV, ",s->Sident
		? (char *)s->Sident : "",i);

	// We have the sym idx of the basic IV. We need to find
	// the parent of the increment elem for it.

	// First find the defnod[]
	for (j = 0; j < deftop; j++)
	{	// If defnod is a def of i and it is in the loop
		if (defnod[j].DNelem->E1 &&	// OPasm are def nodes
		    defnod[j].DNelem->E1->EV.sp.Vsym == s &&
		    vec_testbit(defnod[j].DNblock->Bdfoidx,l->Lloop))
			goto L1;
	}
	assert(0);			// should have found it
	// NOTREACHED

    L1:	biv->IVincr = el_parent(defnod[j].DNelem,&(defnod[j].DNblock->Belem));
	assert(s == (*biv->IVincr)->E1->EV.sp.Vsym);
#ifdef DEBUG
	if (debugc)
	{   dbg_printf("Opeq elem is: "); WReqn(*biv->IVincr);	dbg_printf("\n"); }
#endif
    Lcont:
	;
    }

    vec_free(poss);
    vec_free(notposs);
}

/*****************************
 * Find families for each basic IV.
 * An IV family is a list of elems of the form
 * c1*X+c2, where X is a basic induction variable.
 * Note that we do not do divides, because of roundoff error problems.
 */

STATIC void findivfams(register loop *l)
{ register Iv *biv;
  register unsigned i;
  register famlist *fl;

  cmes2("findivfams(%p)\n",l);
  for (biv = l->Livlist; biv; biv = biv->IVnext)
  {	foreach (i,dfotop,l->Lloop)	/* for each block in loop	*/
	    if (dfo[i]->Belem)
		ivfamelems(biv,&(dfo[i]->Belem));
	/* Fold all the constant expressions in c1 and c2.	*/
	for (fl = biv->IVfamily; fl; fl = fl->FLnext)
	{	fl->c1 = doptelem(fl->c1,GOALvalue | GOALagain);
		fl->c2 = doptelem(fl->c2,GOALvalue | GOALagain);
	}
  }
}

/*************************
 * Tree walking support routine for findivfams().
 *	biv =	basic induction variable pointer
 *	pn	pointer to elem
 */

STATIC void ivfamelems(register Iv *biv,register elem **pn)
{ register unsigned op;
  register tym_t ty,c2ty;
  register famlist *f;
  register elem *n,*n1,*n2;

  assert(pn);
  n = *pn;
  assert(biv && n);
  op = n->Eoper;
  if (OTunary(op))
  {	ivfamelems(biv,&n->E1);
	n1 = n->E1;
  }
  else if (OTbinary(op))
  {	ivfamelems(biv,&n->E1);
	ivfamelems(biv,&n->E2);	/* LTOR or RTOL order is unimportant */
	n1 = n->E1;
	n2 = n->E2;
  }
  else					/* else leaf elem		*/
	return;				/* which can't be in the family	*/

  if (op == OPmul || op == OPadd || op == OPmin ||
	op == OPneg || op == OPshl)
  {	/* Note that we are wimping out and not considering		*/
	/* LI variables as part of c1 and c2, but only constants.	*/

	ty = n->Ety;

	/* Since trees are canonicalized, basic induction variables	*/
	/* will only appear on the left.				*/

	/* Improvement:							*/
	/* We wish to pick up the cases (biv + li), (biv - li) and	*/
	/* (li + biv). OPmul and LS with bivs are out, since if we	*/
	/* try to eliminate the biv, and the loop test is a >, >=,	*/
	/* <, <=, we have a problem since we don't know if the li	*/
	/* is negative. (Would have to call swaprel() on it.)		*/

	/* If we have (li + var), swap the leaves.			*/
	if (op == OPadd && isLI(n1) && n1->Eoper == OPvar && n2->Eoper == OPvar)
	{	n->E1 = n2;
		n2 = n->E2 = n1;
		n1 = n->E1;
	}

	// Get rid of case where we painted a far pointer to a long
	if (op == OPadd || op == OPmin)
	{   int sz;

	    sz = tysize(ty);
	    if (sz == tysize[TYfptr] && !tyfv(ty) &&
		(sz != tysize(n1->Ety) || sz != tysize(n2->Ety)))
		return;
	}

	/* Look for function of basic IV (-biv or biv op const)		*/
	if (n1->Eoper == OPvar && n1->EV.sp.Vsym == biv->IVbasic)
	{	if (op == OPneg)
		{	register famlist *fl;

			cmes2("found (-biv), elem %p\n",n);
			fl = newfamlist(ty);
			fl->FLivty = n1->Ety;
			fl->FLpelem = pn;
			fl->FLnext = biv->IVfamily;
			biv->IVfamily = fl;
			fl->c1 = el_una(op,ty,fl->c1); /* c1 = -1	*/
		}
		else if (n2->Eoper == OPconst ||
			 isLI(n2) && (op == OPadd || op == OPmin))
		{	register famlist *fl;

#ifdef DEBUG
			if (debugc)
			{	dbg_printf("found (biv op const), elem (");
				WReqn(n);
				dbg_printf(");\n");
				dbg_printf("Types: n1="); WRTYxx(n1->Ety);
				dbg_printf(" ty="); WRTYxx(ty);
				dbg_printf(" n2="); WRTYxx(n2->Ety);
				dbg_printf("\n");
			}
#endif
			fl = newfamlist(ty);
			fl->FLivty = n1->Ety;
			fl->FLpelem = pn;
			fl->FLnext = biv->IVfamily;
			biv->IVfamily = fl;
			switch (op)
			{ case OPadd:		/* c2 = right		*/
				c2ty = n2->Ety;
				if (typtr(fl->c2->Ety))
					c2ty = fl->c2->Ety;
				goto L1;
			  case OPmin:		/* c2 = -right		*/
				c2ty = fl->c2->Ety;
				/* Check for subtracting two pointers */
				if (typtr(c2ty) && typtr(n2->Ety))
				{
#if TX86
				    if (tybasic(c2ty) == TYhptr)
					c2ty = TYlong;
				    else
#endif
					c2ty = TYint;
				}
			  L1:
				fl->c2 = el_bin(op,c2ty,fl->c2,el_copytree(n2));
				break;
			  case OPmul:		/* c1 = right		*/
			  case OPshl:		/* c1 = 1 << right	*/
				fl->c1 = el_bin(op,ty,fl->c1,el_copytree(n2));
				break;
			  default:
				assert(0);
			}
		}
	}

	/* Look for function of existing IV				*/

	for (f = biv->IVfamily; f; f = f->FLnext)
	{	if (*f->FLpelem != n1)		/* not it		*/
			continue;

		/* Look for (f op constant)	*/
		if (op == OPneg)
		{
			cmes2("found (-f), elem %p\n",n);
			/* c1 = -c1; c2 = -c2; */
			f->c1 = el_una(OPneg,ty,f->c1);
			f->c2 = el_una(OPneg,ty,f->c2);
			f->FLty = ty;
			f->FLpelem = pn;	/* replace with new IV	*/

		}
		else if (n2->Eoper == OPconst ||
			 isLI(n2) && (op == OPadd || op == OPmin))
		{
#ifdef DEBUG
			if (debugc)
			{	dbg_printf("found (f op const), elem (");
				WReqn(n);
				assert(*pn == n);
				dbg_printf(");\n");
				elem_print(n);
			}
#endif
			switch (op)
			{   case OPmul:
			    case OPshl:
				f->c1 = el_bin(op,ty,f->c1,el_copytree(n2));
				break;
			    case OPadd:
			    case OPmin:
				break;
			    default:
				assert(0);
			}
			f->c2 = el_bin(op,ty,f->c2,el_copytree(n2));
			f->FLty = ty;
			f->FLpelem = pn;	/* replace with new IV	*/
		} /* else if */
	} /* for */
  } /* if */
}

/*********************************
 * Eliminate frivolous family ivs, that is,
 * if we can't eliminate the BIV, then eliminate family ivs that
 * differ from it only by a constant.
 */

STATIC void elimfrivivs(loop *l)
{   Iv *biv;

    for (biv = l->Livlist; biv; biv = biv->IVnext)
    {	int nfams;
	famlist *fl;
	int nrefs;

cmes("elimfrivivs()\n");
	/* Compute number of family ivs for biv	*/
	nfams = 0;
	for (fl = biv->IVfamily; fl; fl = fl->FLnext)
		nfams++;
cmes2("nfams = %d\n",nfams);

	/* Compute number of references to biv	*/
	if (onlyref(biv->IVbasic,l,*biv->IVincr,&nrefs))
		nrefs--;
cmes2("nrefs = %d\n",nrefs);
	assert(nrefs + 1 >= nfams);
	if (nrefs > nfams ||		// if we won't eliminate the biv
	    (I32 && nrefs == nfams))
	{   /* Eliminate any family ivs that only differ by a constant	*/
	    /* from biv							*/
	    for (fl = biv->IVfamily; fl; fl = fl->FLnext)
	    {	elem *ec1 = fl->c1;
		targ_llong c;

		if (elemisone(ec1)
#if TX86
			||
		    // Eliminate fl's that can be represented by
		    // an addressing mode
		    (I32 && ec1->Eoper == OPconst && tyintegral(ec1->Ety) &&
		     ((c = el_tolong(ec1)) == 2 || c == 4 || c == 8)
		    )
#endif
		   )
		{	fl->FLtemp = FLELIM;
#ifdef DEBUG
			if (debugc)
			{	dbg_printf("Eliminating frivolous IV ");
				WReqn(*fl->FLpelem);
				dbg_printf("\n");
			}
#endif
		}
	    }
	}
    }
}

#if !TX86
// input: IV tree, original IV symbol
// output: offset into original IV symbol. 
//
// the purpose is to keep track of offsets into integer after the following rewite:
//
//	int to short|char		
//		|
//		|		=>	
//	       int				short | char
//	       	|				      |
//	       	i				     i + 2 | 3
//
//	i.e. we do not want the 2 | 3 to be lost int the process of creating a TMP IV
//	
//
static IVSymFound;
static int FindIVOffset(elem * e, symbol *s)
{
    int   offset;
    assert(e);
    assert(s);
     
	if (EOP(e))
	{
		offset = FindIVOffset(e->E1, s);
		if (IVSymFound)
			return(offset);
			
		if (EBIN(e))
		{
			e = e->E2;
			offset = FindIVOffset(e, s);
			if (IVSymFound)
				return(offset);
		}
		
		assert(0);	// symbol should be found somewhere
		return(0);
			
	}
	
	if (e->Eoper == OPvar && e->EV.sp.Vsym == s)
	{
		IVSymFound = TRUE;
		return (e->Eoffset);
	}
	
	return(0);		
}
#endif


/******************************
 * Introduce new variables.
 */

STATIC void intronvars(loop *l)
{
    famlist *fl;
    Iv *biv;
    elem *T, *ne, *t2, *C2, *cmul;
    tym_t ty,tyr;

    cmes2("intronvars(%p)\n",l);
    for (biv = l->Livlist; biv; biv = biv->IVnext)	// for each basic IV
    {	register elem *bivinc = *biv->IVincr;	/* ptr to increment elem */

	for (fl = biv->IVfamily; fl; fl = fl->FLnext)
	{				/* for each IV in family of biv	 */
	    if (fl->FLtemp == FLELIM)	/* if already eliminated	 */
		continue;

	    /* If induction variable can be written as a simple function */
	    /* of a previous induction variable, skip it.		 */
	    if (funcprev(biv,fl))
		continue;

#if TARGET_POWERPC
 extern INT32 numbitsset(UINT32);

	    if (!fl->c1) {
	    	fl->FLtemp = FLELIM;
	    	continue;
	    	
	    }




	    if (cnst(fl->c1))
	    {	
	    	elem * ec1 = fl->c1;
	    	if (tyfloating(tybasic(ec1->Ety))) 
	    		goto create_temp;
	    		
		{
	    	targ_llong v = el_tolong(ec1);
		if (numbitsset((UINT32) v) != 1)
		    goto create_temp;
	
		fl->FLtemp = FLELIM;
			
		continue;
	    	}
	    }

create_temp:

#endif

	    ty = fl->FLty;
	    T = el_alloctmp(ty);	/* allocate temporary T		 */
	    fl->FLtemp = T->EV.sp.Vsym;
#if DEBUG
	    cmes2("intronvars() introduced new variable '%s' of type ",T->EV.sp.Vsym->Sident);
	    if (debugc) WRTYxx(ty);
	    cmes("\n");
#endif

	    /* append elem T=biv*C1+C2 to preheader */
	    /* ne = biv*C1	*/
	    tyr = fl->FLivty;			/* type of biv		    */
	    ne = el_var(biv->IVbasic);
#if !TX86	    
	    {
	    	// see comment for FindIVOffset
		IVSymFound = FALSE;
	    	ne->Eoffset = FindIVOffset((*fl->FLpelem), biv->IVbasic);
	    }
#endif
	    ne->Ety = tyr;
	    if (!elemisone(fl->c1))		/* don't multiply ptrs by 1 */
		ne = el_bin(OPmul,tyr,ne,el_copytree(fl->c1));
	    if (tyfv(tyr) && tysize(ty) == SHORTSIZE)
		ne = el_una(OPlngsht,ty,ne);
	    C2 = el_copytree(fl->c2);
	    t2 = el_bin(OPadd,ty,ne,C2);	/* t2 = ne + C2		*/
	    ne = el_bin(OPeq,ty,el_copytree(T),t2);
	    appendelem(ne, &(l->Lpreheader->Belem));

	    /* prefix T+=C1*C to elem biv+=C				*/
	    /* Must prefix in case the value of the expression (biv+=C)	*/
	    /* is used by somebody up the tree.				*/
	    cmul = el_bin(OPmul,fl->c1->Ety,el_copytree(fl->c1),
				 el_copytree(bivinc->E2));
	    t2 = el_bin(bivinc->Eoper,ty,el_copytree(T),cmul);
	    t2 = doptelem(t2,GOALvalue | GOALagain);
	    *biv->IVincr = el_bin(OPcomma,bivinc->Ety,t2,bivinc);
	    biv->IVincr = &((*biv->IVincr)->E2);
#ifdef DEBUG
	    if (debugc)
	    {	dbg_printf("Replacing elem (");
		WReqn(*fl->FLpelem);
		dbg_printf(") with '%s'\n",T->EV.sp.Vsym->Sident);
		dbg_printf("The init elem is (");
		WReqn(ne);
		dbg_printf(");\nThe increment elem is (");
		WReqn(t2);
		dbg_printf(")\n");
	    }
#endif
	    el_free(*fl->FLpelem);
	    *fl->FLpelem = T;		/* replace elem n with ref to T	 */
	    doflow = TRUE;		/* redo flow analysis		*/
	    changes++;
	} /* for */
    } /* for */
}

/*******************************
 * Determine if induction variable can be rewritten as a simple
 * function of a previously generated temporary.
 * This can frequently
 * generate less code than that of an all-new temporary (especially
 * if it is the same as a previous temporary!).
 * Input:
 *	biv		Basic induction variable
 *	fl		Item in biv's family list we are looking at
 * Returns:
 *	FALSE		Caller should create a new induction variable.
 *	TRUE		*FLpelem is replaced with function of a previous
 *			induction variable. FLtemp is set to FLELIM to
 *			indicate this.
 */

STATIC bool funcprev(Iv *biv,famlist *fl)
{   tym_t tymin;
    int sz;
    famlist *fls;
    elem *e1,*e2,*flse1;

#ifdef DEBUG
    if (debugc)
	dbg_printf("funcprev\n");
#endif
    for (fls = biv->IVfamily; fls != fl; fls = fls->FLnext)
    {	assert(fls);			/* fl must be in list		*/
	if (fls->FLtemp == FLELIM)	/* no iv we can use here	*/
		continue;

	/* The multipliers must match	*/
	if (!el_match(fls->c1,fl->c1))
		continue;

	/* If the c2's match also, we got it easy */
	if (el_match(fls->c2,fl->c2))
	{
#if TX86
		if (tysize(fl->FLty) > tysize(fls->FLtemp->ty()))
		    continue;		/* can't increase size of var	*/
#endif
		flse1 = el_var(fls->FLtemp);
#if TARGET_MAC
		if (tysize(fl->FLty) > tysize(flse1->Ety))
		    goto L1;		/* can't increase size of var	*/
#endif
		flse1->Ety = fl->FLty;
		goto L2;
	}

	/* The difference is only in the addition. Therefore, replace
	   *fl->FLpelem with:
		case 1:		(fl->c2 + (fls->FLtemp - fls->c2))
		case 2:		(fls->FLtemp + (fl->c2 - fls->c2))
	 */
	e1 = fl->c2;
#if TX86
	/* Subtracting relocatables usually generates slow code for	*/
	/* linkers that can't handle arithmetic on relocatables.	*/
	if (typtr(fls->c2->Ety))
	{   if (fls->c2->Eoper == OPrelconst &&
		!(fl->c2->Eoper == OPrelconst &&
		  fl->c2->EV.sp.Vsym == fls->c2->EV.sp.Vsym)
	       )
		continue;
	}
#endif
	flse1 = el_var(fls->FLtemp);
	e2 = flse1;				/* assume case 1	*/
	tymin = e2->Ety;
	if (typtr(fls->c2->Ety))
	{	if (!typtr(tymin))
		{   if (typtr(e1->Ety))
		    {	e1 = e2;
			e2 = fl->c2;		/* case 2		*/
		    }
		    else			/* can't subtract fptr	*/
			goto L1;
		}
#if TX86
		if (tybasic(fls->c2->Ety) == TYhptr)
		    tymin = TYlong;
		else
#endif
		    tymin = TYint;		/* type of (ptr - ptr) */
	}

#if TX86
	/* If e1 and fls->c2 are fptrs, and are not from the same	*/
	/* segment, we cannot subtract them.				*/
	if (tyfv(e1->Ety) && tyfv(fls->c2->Ety))
	{   if (e1->Eoper != OPrelconst || fls->c2->Eoper != OPrelconst)
		goto L1;		/* assume expressions have diff segs */
	    if (e1->EV.sp.Vsym->Sclass != fls->c2->EV.sp.Vsym->Sclass)
		{ L1:
		    el_free(flse1);
		    continue;
		}
	}
#else
L1:
	el_free(flse1);
        continue;

#endif
	/* Some more type checking...	*/
	sz = tysize(fl->FLty);
	if (sz != tysize(e1->Ety) &&
	    sz != tysize(tymin))
	    goto L1;

	/* Do some type checking (can't add pointers and get a pointer!) */
#if TX86
	//if (typtr(fl->FLty) && typtr(e1->Ety) && typtr(tymin))
	    //goto L1;
#else
	assert(!(typtr(fl->FLty) && typtr(e1->Ety) && typtr(tymin)));
#endif
	/* Construct (e1 + (e2 - fls->c2))	*/
	flse1 = el_bin(OPadd,fl->FLty,
			    e1,
			    el_bin(OPmin,tymin,
				    e2,
				    el_copytree(fls->c2)));
#if TX86
	if (sz < tysize(tymin) && sz == tysize(e1->Ety))
	    flse1->E2 = el_una(OPoffset,fl->FLty,flse1->E2);
#endif

	flse1 = doptelem(flse1,GOALvalue | GOALagain);
	fl->c2 = NULL;
    L2:
#ifdef DEBUG
	if (debugc)
	{	dbg_printf("Replacing ");
		WReqn(*fl->FLpelem);
		dbg_printf(" with ");
		WReqn(flse1);
		dbg_printf("\n");
	}
#endif
	el_free(*fl->FLpelem);
	*fl->FLpelem = flse1;

	/* Fix the iv so when we do loops again, we won't create	*/
	/* yet another iv, which is just what funcprev() is supposed	*/
	/* to prevent.							*/
	fls->FLtemp->Sflags |= SFLnotbasiciv;

	fl->FLtemp = FLELIM;		/* mark iv as being gone	*/
	changes++;
	doflow = TRUE;
	return TRUE;			/* it was replaced		*/
    }
    return FALSE;			/* need to create a new variable */
}

/***********************
 * Eliminate basic IVs.
 */

STATIC void elimbasivs(register loop *l)
{ famlist *fl;
  register Iv *biv;
  register unsigned i;
  register tym_t ty;
  register elem **pref,*fofe,*C2;
  symbol *X;
  int refcount;

  cmes2("elimbasivs(%p)\n",l);
  for (biv = l->Livlist; biv; biv = biv->IVnext)	// for each basic IV
  {

	/* Can't eliminate this basic IV if we have a goal for the	*/
	/* increment elem.						*/
	// Be careful about Nflags being in a union...
	if (!((*biv->IVincr)->Nflags & NFLnogoal))
		continue;

	X = biv->IVbasic;
	assert(symbol_isintab(X));
	ty = X->ty();
	pref = onlyref(X,l,*biv->IVincr,&refcount);

	/* if only ref of X is of the form (X) or (X relop e) or (e relop X) */
	if (pref != NULL && refcount <= 1)
	{	elem *ref;
		tym_t flty;

		fl = biv->IVfamily;
		if (!fl)		// if no elems in family of biv
		    continue;

		ref = *pref;

		/* Replace (X) with (X != 0)				*/
		if (ref->Eoper == OPvar)
		    ref = *pref = el_bin(OPne,TYint,ref,el_long(ref->Ety,0L));

		fl = simfl(fl,ty);	/* find simplest elem in family	*/
		if (!fl)
		    continue;

		// Don't do the replacement if we would replace a
		// signed comparison with an unsigned one
		flty = fl->FLty;
		if (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety))
		    flty = touns(flty);

		if (ref->Eoper >= OPle && ref->Eoper <= OPge &&
#if 1
		    !(tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) &&
		     tyuns(flty))
#else
		    (tyuns(ref->E1->Ety) | tyuns(ref->E2->Ety)) !=
		     tyuns(flty))
#endif
			continue;

		/* if we have (e relop X), replace it with (X relop e)	*/
		if (ref->E2->Eoper == OPvar && ref->E2->EV.sp.Vsym == X)
		{	register elem *tmp;

			tmp = ref->E2;
			ref->E2 = ref->E1;
			ref->E1 = tmp;
			ref->Eoper = swaprel(ref->Eoper);
		}

		// If e*c1+c2 would result in a sign change or an overflow
		// then we can't do it
		if (fl->c1->Eoper == OPconst)
		{   
#if LONGLONG
		    targ_llong c1;
#else
		    targ_long c1;
#endif
		    int sz;

		    c1 = el_tolong(fl->c1);
		    sz = tysize(ty);
		    if (sz == SHORTSIZE &&
			((ref->E2->Eoper == OPconst &&
			c1 * el_tolong(ref->E2) & ~0x7FFFL) ||
			 c1 & ~0x7FFFL)
		       )
			continue;

		    if (sz == LONGSIZE &&
			((ref->E2->Eoper == OPconst &&
			c1 * el_tolong(ref->E2) & ~0x7FFFFFFFL) ||
			 c1 & ~0x7FFFFFFFL)
		       )
			continue;
#if LONGLONG && __INTSIZE == 4
		    if (sz == LLONGSIZE &&
			((ref->E2->Eoper == OPconst &&
			c1 * el_tolong(ref->E2) & ~0x7FFFFFFFFFFFFFFFLL) ||
			 c1 & ~0x7FFFFFFFFFFFFFFFLL)
		       )
			continue;
#endif
		}

		elem *refE2 = el_copytree(ref->E2);
		int refEoper = ref->Eoper;

		/* if c1 < 0 and relop is < <= > >=
		   then adjust relop as if both sides were multiplied
		   by -1
		 */
		if (!tyuns(ty) &&
		    (tyintegral(ty) && el_tolong(fl->c1) < 0 ||
#if TARGET_68K && __POWERPC
		     tyfloating(ty) && Xlt(el_toldouble(fl->c1),Xzero()) ))
#else
		     tyfloating(ty) && el_toldouble(fl->c1) < 0.0))
#endif
			refEoper = swaprel(refEoper);

		/* Replace (X relop e) with (X relop (short)e)
		   if T is 1 word but e is 2
		 */
		if (tysize(flty) == SHORTSIZE &&
		    tysize(refE2->Ety) == LONGSIZE)
		    refE2 = el_una(OPlngsht,flty,refE2);

		/* replace e with e*c1 + c2		*/
		C2 = el_copytree(fl->c2);
		fofe = el_bin(OPadd,flty,
				el_bin(OPmul,refE2->Ety,
					refE2,
					el_copytree(fl->c1)),
				C2);
		fofe = doptelem(fofe,GOALvalue | GOALagain);	// fold any constants

		if (tyuns(flty) && refEoper == OPge &&
		    fofe->Eoper == OPconst && el_allbits(fofe, 0) &&
		    fl->c2->Eoper == OPconst && !el_allbits(fl->c2, 0))
		{
		    /* Don't do it if replacement will result in
		     * an unsigned T>=0 which will be an infinite loop.
		     */
		    el_free(fofe);
		    continue;
		}

		cmes2("Eliminating basic IV '%s'\n",X->Sident);

#ifdef DEBUG
		if (debugc)
		{   dbg_printf("Comparison replaced: ");
		    WReqn(ref);
		    dbg_printf(" with ");
		}
#endif

		el_free(ref->E2);
		ref->E2 = refE2;
		ref->Eoper = refEoper;

		elimass(*biv->IVincr);		// dump the increment elem

		// replace X with T
#if TX86
		assert(ref->E1->EV.sp.Voffset == 0);
				// can be set for 68K when cast to smaller size
#endif
		ref->E1->EV.sp.Vsym = fl->FLtemp;
		ref->E1->Ety = flty;
		ref->E2 = fofe;

		/* If sizes of expression worked out wrong...
		   Which can happen if we have (int)ptr==e
		 */
#if TX86 // DJB: Who's correct here?
		if (EBIN(fofe))		/* if didn't optimize it away	*/
#else
		if (EOP(fofe))		/* if didn't optimize it away	*/
#endif
		{   int sz;
		    tym_t ty,ty1,ty2;

		    ty = fofe->Ety;
		    sz = tysize(ty);
		    ty1 = fofe->E1->Ety;
		    ty2 = fofe->E2->Ety;
		    /* Sizes of + expression must all be the same	*/
		    if (sz != tysize(ty1) &&
			sz != tysize(ty2)
		       )
		    {
			if (tyuns(ty))		/* if unsigned comparison */
			    ty1 = touns(ty1);	/* to unsigned type	*/
			fofe->Ety = ty1;
			ref->E1->Ety = ty1;
		    }
		}

#if TX86
		/* Fix if leaves of compare are TYfptrs and the compare	*/
		/* operator is < <= > >=.				*/
		if (ref->Eoper >= OPle && ref->Eoper <= OPge &&
		    tyfv(ref->E1->Ety))
		{	assert(tyfv(ref->E2->Ety));
			ref->E1 = el_una(OPoffset,TYuint,ref->E1);
			ref->E2 = el_una(OPoffset,TYuint,fofe);
		}
#endif
#ifdef DEBUG
		if (debugc)
		{   WReqn(ref);
		    dbg_printf("\n");
		}
#endif

		changes++;
		doflow = TRUE;			/* redo flow analysis	*/

		/* if X is live on entry to any successor S outside loop */
		/*	prepend elem X=(T-c2)/c1 to S.Belem	*/

		foreach (i,dfotop,l->Lexit)	/* for each exit block	*/
		{	register elem *ne;
			register block *b;
			register list_t bl;

			for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
			{			/* for each successor	*/
				b = list_block(bl);
				if (vec_testbit(b->Bdfoidx,l->Lloop))
					continue;	/* inside loop	*/
				if (!vec_testbit(X->Ssymnum,b->Binlv))
					continue;	/* not live	*/

				C2 = el_copytree(fl->c2);
				ne = el_bin(OPmin,ty,
					el_var(fl->FLtemp),
					C2);
#if TX86
				if (tybasic(ne->E1->Ety) == TYfptr &&
				    tybasic(ne->E2->Ety) == TYfptr)
				{   ne->Ety = TYint;
				    if (tylong(ty) && intsize == 2)
					ne = el_una(OPshtlng,ty,ne);
				}
#endif

				ne = el_bin(OPeq,X->ty(),
					el_var(X),
					el_bin(OPdiv,ne->Ety,
					    ne,
					    el_copytree(fl->c1)));
#ifdef DEBUG
				if (debugc)
				{   dbg_printf("Adding (");
				    WReqn(ne);
				    dbg_printf(") to exit block B%d\n",b->Bdfoidx);
				    //elem_print(ne);
				}
#endif
				/* We have to add a new block if there is */
				/* more than one predecessor to b.	*/
				if (list_next(b->Bpred))
				{   block *bn;
				    register list_t bl2;

				    bn = block_calloc();
				    bn->Btry = b->Btry;
				    numblks++;
				    assert(numblks <= maxblks);
				    bn->BC = BCgoto;
				    bn->Bnext = dfo[i]->Bnext;
				    dfo[i]->Bnext = bn;
				    list_append(&(bn->Bsucc),b);
				    list_append(&(bn->Bpred),dfo[i]);
				    list_ptr(bl) = (void *)bn;
				    for (bl2 = b->Bpred; bl2;
					 bl2 = list_next(bl2))
					if (list_block(bl2) == dfo[i])
					{	list_ptr(bl2) = (void *)bn;
						goto L2;
					}
				    assert(0);
				L2:
				    b = bn;
				    addblk = TRUE;
				}

				if (b->Belem)
				    b->Belem =
					el_bin(OPcomma,b->Belem->Ety,
					    ne,b->Belem);
				else
				    b->Belem = ne;
				changes++;
				doflow = TRUE;	/* redo flow analysis	*/
			} /* for each successor */
		} /* foreach exit block */
		if (addblk)
			return;
	}
	else if (refcount == 0)			/* if no uses of IV in loop  */
	{	/* Eliminate the basic IV if it is not live on any successor */
		foreach (i,dfotop,l->Lexit)	/* for each exit block	     */
		{	register block *b;
			register list_t bl;

			for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
			{			/* for each successor	*/
				b = list_block(bl);
				if (vec_testbit(b->Bdfoidx,l->Lloop))
					continue;	/* inside loop	*/
				if (vec_testbit(X->Ssymnum,b->Binlv))
					goto L1;	/* live		*/
			}
		}

		cmes3("No uses, eliminating basic IV '%s' (%d)\n",(X->Sident)
			? (char *)X->Sident : "",X);

		/* Dump the increment elem				*/
		/* (Replace it with an OPconst that only serves as a	*/
		/* placeholder in the tree)				*/
		*(biv->IVincr) = el_selecte2(*(biv->IVincr));

		changes++;
		doflow = TRUE;			/* redo flow analysis	*/
	    L1:	;
	}
  } /* for */
}


/***********************
 * Eliminate opeq IVs that are not used outside the loop.
 */

STATIC void elimopeqs(register loop *l)
{   famlist *fl;
    Iv *biv;
    unsigned i;
    tym_t ty;
    elem **pref,*fofe,*C2;
    symbol *X;
    int refcount;

    cmes2("elimopeqs(%p)\n",l);
    for (biv = l->Lopeqlist; biv; biv = biv->IVnext)	// for each opeq IV
    {

	// Can't eliminate this basic IV if we have a goal for the
	// increment elem.
	// Be careful about Nflags being in a union...
	if (!((*biv->IVincr)->Nflags & NFLnogoal))
	    continue;

	X = biv->IVbasic;
	assert(symbol_isintab(X));
	ty = X->ty();
	pref = onlyref(X,l,*biv->IVincr,&refcount);

	// if only ref of X is of the form (X) or (X relop e) or (e relop X)
	if (pref != NULL && refcount <= 1)
	    ;
	else if (refcount == 0)			// if no uses of IV in loop
	{   // Eliminate the basic IV if it is not live on any successor
	    foreach (i,dfotop,l->Lexit)	// for each exit block
	    {   block *b;
		list_t bl;

		for (bl = dfo[i]->Bsucc; bl; bl = list_next(bl))
		{   // for each successor
		    b = list_block(bl);
		    if (vec_testbit(b->Bdfoidx,l->Lloop))
			continue;	// inside loop
		    if (vec_testbit(X->Ssymnum,b->Binlv))
			goto L1;	// live
		}
	    }

	    cmes3("No uses, eliminating opeq IV '%s' (%d)\n",(X->Sident)
		    ? (char *)X->Sident : "",X);

	    // Dump the increment elem
	    // (Replace it with an OPconst that only serves as a
	    // placeholder in the tree)
	    *(biv->IVincr) = el_selecte2(*(biv->IVincr));

	    changes++;
	    doflow = TRUE;			// redo flow analysis
	L1:	;
	}
    }
}

/**************************
 * Find simplest elem in family.
 * Input:
 *	tym	type of basic IV
 * Return NULL if none found.
 */

STATIC famlist * simfl(famlist *fl,tym_t tym)
{ famlist *sofar;

  assert(fl);
  sofar = NULL;
  for (; fl; fl = fl->FLnext)
  {
	if (fl->FLtemp == FLELIM)	/* no variable, so skip it	*/
	    continue;
	/* If size of replacement is less than size of biv, we could	*/
	/* be in trouble due to loss of precision.			*/
	if (size(fl->FLtemp->ty()) < size(tym))
	    continue;
	sofar = flcmp(sofar,fl);	/* pick simplest		*/
  }
  return sofar;
}

/**************************
 * Return simpler of two family elems.
 * There is room for improvement, namely if
 * 	f1.c1 = 2, f2.c1 = 27
 * then pick f1 because it is a shift.
 */

STATIC famlist * flcmp(famlist *f1,famlist *f2)
{   tym_t ty;
    union eve *t1,*t2;

    assert(f2);
    if (!f1)
	goto Lf2;
    t1 = &(f1->c1->EV);
    t2 = &(f2->c1->EV);
    ty = (*f1->FLpelem)->Ety;		/* type of elem			*/
#if 0
    printf("f1: c1 = %d, c2 = %d\n",t1->Vshort,f1->c2->EV.Vshort);
    printf("f2: c1 = %d, c2 = %d\n",t2->Vshort,f2->c2->EV.Vshort);
    WRTYxx((*f1->FLpelem)->Ety);
    WRTYxx((*f2->FLpelem)->Ety);
#endif
    /* Wimp out and just pick f1 if the types don't match		*/
    if (tysize(ty) == tysize((*f2->FLpelem)->Ety))
    {
	switch (tybasic(ty))
	{   case TYbool:
	    case TYchar:
	    case TYschar:
	    case TYuchar:
		if (t2->Vuchar == 1 ||
		    t1->Vuchar != 1 && f2->c2->EV.Vuchar == 0)
			goto Lf2;
		break;
	    case TYshort:
	    case TYushort:
	    case TYwchar_t:	// BUG: what about 4 byte wchar_t's?
#if TX86
#if JHANDLE
	    case TYjhandle:
#endif
	    case TYnptr:
	    case TYsptr:
	    case TYcptr:
#endif
	    case_short:
		if (t2->Vshort == 1 ||
		    t1->Vshort != 1 && f2->c2->EV.Vshort == 0)
			goto Lf2;
		break;

	    case TYint:
	    case TYuint:
		if (intsize == SHORTSIZE)
		    goto case_short;
		else
		    goto case_long;
	    case TYlong:
	    case TYulong:
	    case TYfptr:
	    case TYvptr:
#if TX86
	    case TYhptr:
#endif
	    case_long:
		if (t2->Vlong == 1 ||
		    t1->Vlong != 1 && f2->c2->EV.Vlong == 0)
			goto Lf2;
		break;
	    case TYfloat:
		if (t2->Vfloat == 1 ||
		    t1->Vfloat != 1 && f2->c2->EV.Vfloat == 0)
			goto Lf2;
		break;
	    case TYdouble:
	    case TYdouble_alias:
		if (t2->Vdouble == 1.0 ||
		    t1->Vdouble != 1.0 && f2->c2->EV.Vdouble == 0)
			goto Lf2;
		break;
	    case TYldouble:
#if TARGET_68K && __POWERPC
		if (Xis1(t2->Vldouble) ||
		    !(Xis1(t2->Vldouble)) && Xis0(f2->c2->EV.Vldouble) )
#else
		if (t2->Vldouble == 1.0 ||
		    t1->Vldouble != 1.0 && f2->c2->EV.Vldouble == 0)
#endif
			goto Lf2;
		break;
	    default:
		assert(0);
	}
    }
    //printf("picking f1\n");
    return f1;

Lf2:
    //printf("picking f2\n");
    return f2;
}

/************************************
 * Input:
 *	x	basic IV symbol
 *	incn	increment elem for basic IV X.
 * Output:
 *	*prefcount	# of references to X other than the increment elem
 * Returns:
 *	If ref of X in loop l is of the form (X relop e) or (e relop X)
 *		Return the relop elem
 *	Else
 *		Return NULL
 */

static int count;
static elem **nd,*sincn;
static symbol *X;

STATIC elem ** onlyref(symbol *x,loop *l,elem *incn,int *prefcount)
{ register unsigned i;

  //printf("onlyref('%s')\n", x->Sident);
  X = x;				/* save some parameter passing	*/
  assert(symbol_isintab(x));
  sincn = incn;
#ifdef DEBUG
  if (!(X->Ssymnum < globsym.top && l && incn))
	dbg_printf("X = %d, globsym.top = %d, l = x%lx, incn = x%lx\n",X->Ssymnum,globsym.top,l,incn);
#endif
  assert(X->Ssymnum < globsym.top && l && incn);
  count = 0;
  nd = NULL;
  foreach (i,dfotop,l->Lloop)		/* for each block in loop	*/
  {	block *b;

	b = dfo[i];
	if (b->Belem)
	{
	    countrefs(&b->Belem,b->BC == BCiftrue);
	}
  }
#if 0
  dbg_printf("count = %d, nd = (");
  if (nd) WReqn(*nd);
  dbg_printf(")\n");
#endif
  *prefcount = count;
  return nd;
}

/******************************
 * Count elems of the form (X relop e) or (e relop X).
 * Do not count the node if it is the increment node (sincn).
 * Input:
 *	flag:	TRUE if block wants to test the elem
 */

STATIC void countrefs(register elem **pn,bool flag)
{ elem *n = *pn;

  assert(n);
  if (n == sincn)			/* if it is the increment elem	*/
  {
	if (OTbinary(n->Eoper))
	    countrefs(&n->E2, FALSE);
	return;				// don't count lvalue
  }
  if (OTunary(n->Eoper))
	countrefs(&n->E1,FALSE);
  else if (OTbinary(n->Eoper))
  {
	if (OTrel(n->Eoper))
	{	elem *e1 = n->E1;

		assert(e1->Eoper != OPcomma);
		if (e1 == sincn &&
		    (e1->Eoper == OPeq || OTopeq(e1->Eoper)))
		    goto L1;

		/* Check both subtrees to see if n is the comparison node,
		 * that is, if X is a leaf of the comparison.
		 */
		if (e1->Eoper == OPvar && e1->EV.sp.Vsym == X && !countrefs2(n->E2) ||
#if TX86
		    n->E2->Eoper == OPvar && n->E2->EV.sp.Vsym == X && !countrefs2(e1))
#else
		    n->E2->Eoper == OPvar && n->E2->EV.sp.Vsym == X)
#endif
			nd = pn;		/* found the relop node	*/
	}
    L1:
	countrefs(&n->E1,FALSE);
	countrefs(&n->E2,(flag && n->Eoper == OPcomma));
  }
  else if ((n->Eoper == OPvar || n->Eoper == OPrelconst) && n->EV.sp.Vsym == X)
  {	if (flag)
	    nd = pn;			/* comparing it with 0		*/
	count++;			/* found another reference	*/
  }
}

/*******************************
 * Count number of times symbol X appears in elem tree e.
 */

STATIC int countrefs2(elem *e)
{
    elem_debug(e);
    while (OTunary(e->Eoper))
	e = e->E1;
    if (OTbinary(e->Eoper))
	return countrefs2(e->E1) + countrefs2(e->E2);
    return ((e->Eoper == OPvar || e->Eoper == OPrelconst) &&
	    e->EV.sp.Vsym == X);
}

/****************************
 * Eliminate some special cases.
 */

STATIC void elimspec(loop *l)
{ register unsigned i;

  foreach (i,dfotop,l->Lloop)		/* for each block in loop	*/
  {	block *b;

	b = dfo[i];
	if (b->Belem)
	    elimspecwalk(&b->Belem);
  }
}

/******************************
 */

STATIC void elimspecwalk(elem **pn)
{ elem *n;

  n = *pn;
  assert(n);
  if (OTunary(n->Eoper))
	elimspecwalk(&n->E1);
  else if (OTbinary(n->Eoper))
  {
	elimspecwalk(&n->E1);
	elimspecwalk(&n->E2);
	if (OTrel(n->Eoper))
	{	elem *e1 = n->E1;

		/* Replace ((e1,e2) rel e3) with (e1,(e2 rel e3).
		 * This will reduce the number of cases for elimbasivs().
		 * Don't do equivalent with (e1 rel (e2,e3)) because
		 * of potential side effects in e1.
		 */
		if (e1->Eoper == OPcomma)
		{	elem *e;

#ifdef DEBUG
			if (debugc)
			{   dbg_printf("3rewriting ("); WReqn(n); dbg_printf(")\n"); }
#endif
			e = n->E2;
			n->E2 = e1;
			n->E1 = n->E2->E1;
			n->E2->E1 = n->E2->E2;
			n->E2->E2 = e;
			n->E2->Eoper = n->Eoper;
			n->E2->Ety = n->Ety;
			n->Eoper = OPcomma;

			changes++;
			doflow = TRUE;

			elimspecwalk(&n->E1);
			elimspecwalk(&n->E2);
		}

		/* Rewrite ((X op= e2) rel e3) into ((X op= e2),(X rel e3))
		 * Rewrite ((X ++  e2) rel e3) into ((X +=  e2),(X-e2 rel e3))
		 * so that the op= will not have a goal, so elimbasivs()
		 * will work on it.
		 */
		if ((OTopeq(e1->Eoper)
		     || OTpost(e1->Eoper)
		    ) &&
		    !el_sideeffect(e1->E1))
		{	elem *e;
			int op;
#ifdef DEBUG
			if (debugc)
			{ dbg_printf("4rewriting ("); WReqn(n); dbg_printf(")\n"); }
#endif
			e = el_calloc();
			el_copy(e,n);
			e->E1 = el_copytree(e1->E1);
			e->E1->Ety = n->E1->Ety;
			n->E2 = e;
			switch (e1->Eoper)
			{   case OPpostinc:
				e1->Eoper = OPaddass;
				op = OPmin;
				goto L3;
			    case OPpostdec:
				e1->Eoper = OPminass;
				op = OPadd;
			    L3:	e->E1 = el_bin(op,e->E1->Ety,e->E1,el_copytree(e1->E2));
				break;

			}
			/* increment node is now guaranteed to have no goal */
			e1->Nflags |= NFLnogoal;
			n->Eoper = OPcomma;
			//changes++;
			doflow = TRUE;

			elimspecwalk(&n->E1);
			elimspecwalk(&n->E2);
		}
	}
  }
}

#endif
